[Top][All Lists]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Getfem-commits] (no subject)

From: Konstantinos Poulios
Subject: [Getfem-commits] (no subject)
Date: Wed, 27 Dec 2023 10:15:42 -0500 (EST)

branch: overhaul-build-system
commit 4ff66802b9ffb055b6c4805f4c3b573ecf217fd3
Author: Konstantinos Poulios <>
AuthorDate: Wed Dec 27 16:15:24 2023 +0100

    Remove old copy of SuperLU, overhaul, treat MUMPS and SuperLU 
      - Old SuperLU code removed from sources
      - Fixes the SuperLU interface to work with modern releases of SuperLU
      - Unused and duplicate preprocessor definitions removed from config
      - Removes the header-free interface to SuperLU in GetFEM and the files
        "" and "getfem_superlu.h". Now, the Gmm header
        "gmm_superlu_interface.h" is the single point of access to SuperLU.
      - Configuration preprocessor variables are now defined either in Gmm or
        in GetFEM, exclusively. Based on two manually maintained header file
        templates "" and "". The
        header file generated by autoheader is not used.
      - All configuration options are fixed at configure time in the generated
        headers "gmm_arch_config.h" and "getfem_arch_config.h", and cannot
        be overridden with the "-D" option in the command line
      - Both SuperLU and MUMPS are now treated equally in the configure script
      - At least one of the two solvers, MUMPS or SuperLU, is required in
        order to build GetFEM
      - All unit tests were made to run and pass with either SuperLU or MUMPS
      - Unit test "" now respects the lapack configuration of
        the project (as found in "gmm_arch_config.h")
---                                        |     3 +-                                       |   458 +-
 contrib/crack_plate/   |    17 +-
 contrib/icare/                             |    43 +-
 contrib/xfem_contact/                |     4 +
 .../                    |     9 +-
 interface/src/getfemint_precond.h                  |    12 +-
 interface/src/                       |     6 +-
 interface/src/                        |     4 +
 interface/src/gfi_array.h                          |     2 +-
 interface/src/octave/gfi_array.h                   |     2 +-
 interface/src/python/getfem_python.c               |    14 +-
 src/                                    |     2 -
 src/getfem/bgeot_config.h                          |     1 +
 src/getfem/                 |    49 +
 src/getfem/getfem_config.h                         |    36 +-
 src/getfem/getfem_model_solvers.h                  |    25 +-
 src/getfem/getfem_superlu.h                        |   130 -
 src/                              |   430 -
 src/gmm/                       |    25 +
 src/gmm/gmm_solver_Schwarz_additive.h              |    10 +-
 src/gmm/gmm_superlu_interface.h                    |     9 +-
 superlu/BLAS.c                                     | 43902 -------------------
 superlu/BLAS/License.txt                           |    14 -
 superlu/BLAS/caxpy.f                               |   102 -
 superlu/BLAS/ccopy.f                               |    94 -
 superlu/BLAS/cdotc.f                               |   103 -
 superlu/BLAS/cdotu.f                               |   100 -
 superlu/BLAS/cgbmv.f                               |   390 -
 superlu/BLAS/cgemm.f                               |   483 -
 superlu/BLAS/cgemv.f                               |   350 -
 superlu/BLAS/cgerc.f                               |   227 -
 superlu/BLAS/cgeru.f                               |   227 -
 superlu/BLAS/chbmv.f                               |   380 -
 superlu/BLAS/chemm.f                               |   371 -
 superlu/BLAS/chemv.f                               |   337 -
 superlu/BLAS/cher.f                                |   278 -
 superlu/BLAS/cher2.f                               |   317 -
 superlu/BLAS/cher2k.f                              |   442 -
 superlu/BLAS/cherk.f                               |   396 -
 superlu/BLAS/chpmv.f                               |   338 -
 superlu/BLAS/chpr.f                                |   279 -
 superlu/BLAS/chpr2.f                               |   318 -
 superlu/BLAS/crotg.f                               |    74 -
 superlu/BLAS/cscal.f                               |    91 -
 superlu/BLAS/csrot.f                               |   153 -
 superlu/BLAS/csscal.f                              |    94 -
 superlu/BLAS/cswap.f                               |    98 -
 superlu/BLAS/csymm.f                               |   369 -
 superlu/BLAS/csyr2k.f                              |   396 -
 superlu/BLAS/csyrk.f                               |   363 -
 superlu/BLAS/ctbmv.f                               |   429 -
 superlu/BLAS/ctbsv.f                               |   432 -
 superlu/BLAS/ctpmv.f                               |   388 -
 superlu/BLAS/ctpsv.f                               |   390 -
 superlu/BLAS/ctrmm.f                               |   452 -
 superlu/BLAS/ctrmv.f                               |   373 -
 superlu/BLAS/ctrsm.f                               |   477 -
 superlu/BLAS/ctrsv.f                               |   375 -
 superlu/BLAS/dasum.f                               |   111 -
 superlu/BLAS/daxpy.f                               |   115 -
 superlu/BLAS/dcabs1.f                              |    58 -
 superlu/BLAS/dcopy.f                               |   115 -
 superlu/BLAS/ddot.f                                |   117 -
 superlu/BLAS/dgbmv.f                               |   370 -
 superlu/BLAS/dgemm.f                               |   384 -
 superlu/BLAS/dgemv.f                               |   330 -
 superlu/BLAS/dger.f                                |   227 -
 superlu/BLAS/dnrm2.f                               |   112 -
 superlu/BLAS/drot.f                                |   101 -
 superlu/BLAS/drotg.f                               |    86 -
 superlu/BLAS/drotm.f                               |   202 -
 superlu/BLAS/drotmg.f                              |   251 -
 superlu/BLAS/dsbmv.f                               |   375 -
 superlu/BLAS/dscal.f                               |   110 -
 superlu/BLAS/dsdot.f                               |   172 -
 superlu/BLAS/dspmv.f                               |   331 -
 superlu/BLAS/dspr.f                                |   261 -
 superlu/BLAS/dspr2.f                               |   296 -
 superlu/BLAS/dswap.f                               |   122 -
 superlu/BLAS/dsymm.f                               |   367 -
 superlu/BLAS/dsymv.f                               |   333 -
 superlu/BLAS/dsyr.f                                |   263 -
 superlu/BLAS/dsyr2.f                               |   298 -
 superlu/BLAS/dsyr2k.f                              |   399 -
 superlu/BLAS/dsyrk.f                               |   364 -
 superlu/BLAS/dtbmv.f                               |   398 -
 superlu/BLAS/dtbsv.f                               |   401 -
 superlu/BLAS/dtpmv.f                               |   352 -
 superlu/BLAS/dtpsv.f                               |   354 -
 superlu/BLAS/dtrmm.f                               |   415 -
 superlu/BLAS/dtrmv.f                               |   342 -
 superlu/BLAS/dtrsm.f                               |   443 -
 superlu/BLAS/dtrsv.f                               |   338 -
 superlu/BLAS/dzasum.f                              |    98 -
 superlu/BLAS/dznrm2.f                              |   119 -
 superlu/BLAS/icamax.f                              |   107 -
 superlu/BLAS/idamax.f                              |   106 -
 superlu/BLAS/isamax.f                              |   106 -
 superlu/BLAS/izamax.f                              |   107 -
 superlu/BLAS/lsame.f                               |   125 -
 superlu/BLAS/sasum.f                               |   112 -
 superlu/BLAS/saxpy.f                               |   115 -
 superlu/BLAS/scabs1.f                              |    57 -
 superlu/BLAS/scasum.f                              |    97 -
 superlu/BLAS/scnrm2.f                              |   119 -
 superlu/BLAS/scopy.f                               |   115 -
 superlu/BLAS/sdot.f                                |   117 -
 superlu/BLAS/sdsdot.f                              |   255 -
 superlu/BLAS/sgbmv.f                               |   370 -
 superlu/BLAS/sgemm.f                               |   384 -
 superlu/BLAS/sgemv.f                               |   330 -
 superlu/BLAS/sger.f                                |   227 -
 superlu/BLAS/snrm2.f                               |   112 -
 superlu/BLAS/srot.f                                |   101 -
 superlu/BLAS/srotg.f                               |    86 -
 superlu/BLAS/srotm.f                               |   203 -
 superlu/BLAS/srotmg.f                              |   251 -
 superlu/BLAS/ssbmv.f                               |   375 -
 superlu/BLAS/sscal.f                               |   110 -
 superlu/BLAS/sspmv.f                               |   331 -
 superlu/BLAS/sspr.f                                |   261 -
 superlu/BLAS/sspr2.f                               |   296 -
 superlu/BLAS/sswap.f                               |   122 -
 superlu/BLAS/ssymm.f                               |   367 -
 superlu/BLAS/ssymv.f                               |   333 -
 superlu/BLAS/ssyr.f                                |   263 -
 superlu/BLAS/ssyr2.f                               |   298 -
 superlu/BLAS/ssyr2k.f                              |   399 -
 superlu/BLAS/ssyrk.f                               |   364 -
 superlu/BLAS/stbmv.f                               |   398 -
 superlu/BLAS/stbsv.f                               |   401 -
 superlu/BLAS/stpmv.f                               |   352 -
 superlu/BLAS/stpsv.f                               |   354 -
 superlu/BLAS/strmm.f                               |   415 -
 superlu/BLAS/strmv.f                               |   342 -
 superlu/BLAS/strsm.f                               |   443 -
 superlu/BLAS/strsv.f                               |   344 -
 superlu/BLAS/xerbla.f                              |    89 -
 superlu/BLAS/xerbla_array.f                        |   119 -
 superlu/BLAS/zaxpy.f                               |   102 -
 superlu/BLAS/zcopy.f                               |    94 -
 superlu/BLAS/zdotc.f                               |   103 -
 superlu/BLAS/zdotu.f                               |   100 -
 superlu/BLAS/zdrot.f                               |   153 -
 superlu/BLAS/zdscal.f                              |    94 -
 superlu/BLAS/zgbmv.f                               |   390 -
 superlu/BLAS/zgemm.f                               |   483 -
 superlu/BLAS/zgemv.f                               |   350 -
 superlu/BLAS/zgerc.f                               |   227 -
 superlu/BLAS/zgeru.f                               |   227 -
 superlu/BLAS/zhbmv.f                               |   380 -
 superlu/BLAS/zhemm.f                               |   371 -
 superlu/BLAS/zhemv.f                               |   337 -
 superlu/BLAS/zher.f                                |   278 -
 superlu/BLAS/zher2.f                               |   317 -
 superlu/BLAS/zher2k.f                              |   443 -
 superlu/BLAS/zherk.f                               |   396 -
 superlu/BLAS/zhpmv.f                               |   338 -
 superlu/BLAS/zhpr.f                                |   279 -
 superlu/BLAS/zhpr2.f                               |   318 -
 superlu/BLAS/zrotg.f                               |    75 -
 superlu/BLAS/zscal.f                               |    91 -
 superlu/BLAS/zswap.f                               |    98 -
 superlu/BLAS/zsymm.f                               |   369 -
 superlu/BLAS/zsyr2k.f                              |   396 -
 superlu/BLAS/zsyrk.f                               |   363 -
 superlu/BLAS/ztbmv.f                               |   429 -
 superlu/BLAS/ztbsv.f                               |   432 -
 superlu/BLAS/ztpmv.f                               |   388 -
 superlu/BLAS/ztpsv.f                               |   390 -
 superlu/BLAS/ztrmm.f                               |   452 -
 superlu/BLAS/ztrmv.f                               |   373 -
 superlu/BLAS/ztrsm.f                               |   477 -
 superlu/BLAS/ztrsv.f                               |   375 -
 superlu/BLAS_f2c.h                                 |   236 -
 superlu/License.txt                                |    30 -
 superlu/                                |   329 -
 superlu/ccolumn_bmod.c                             |   362 -
 superlu/ccolumn_dfs.c                              |   266 -
 superlu/ccopy_to_ucol.c                            |   112 -
 superlu/cgscon.c                                   |   155 -
 superlu/cgsequ.c                                   |   205 -
 superlu/cgsrfs.c                                   |   457 -
 superlu/cgssv.c                                    |   231 -
 superlu/cgssvx.c                                   |   627 -
 superlu/cgstrf.c                                   |   444 -
 superlu/cgstrs.c                                   |   344 -
 superlu/clacon.c                                   |   236 -
 superlu/clangs.c                                   |   132 -
 superlu/claqgs.c                                   |   160 -
 superlu/cmemory.c                                  |   691 -
 superlu/cmyblas2.c                                 |   204 -
 superlu/colamd.c                                   |  3412 --
 superlu/colamd.h                                   |   246 -
 superlu/cpanel_bmod.c                              |   478 -
 superlu/cpanel_dfs.c                               |   256 -
 superlu/cpivotL.c                                  |   171 -
 superlu/cpivotgrowth.c                             |   130 -
 superlu/cpruneL.c                                  |   156 -
 superlu/creadhb.c                                  |   288 -
 superlu/csnode_bmod.c                              |   117 -
 superlu/csnode_dfs.c                               |   113 -
 superlu/csp_blas2.c                                |   577 -
 superlu/csp_blas3.c                                |   141 -
 superlu/cutil.c                                    |   482 -
 superlu/dcolumn_bmod.c                             |   354 -
 superlu/dcolumn_dfs.c                              |   267 -
 superlu/dcomplex.c                                 |   116 -
 superlu/dcopy_to_ucol.c                            |   112 -
 superlu/dgscon.c                                   |   156 -
 superlu/dgsequ.c                                   |   206 -
 superlu/dgsrfs.c                                   |   447 -
 superlu/dgssv.c                                    |   231 -
 superlu/dgssvx.c                                   |   626 -
 superlu/dgstrf.c                                   |   441 -
 superlu/dgstrs.c                                   |   330 -
 superlu/dgstrsL.c                                  |   230 -
 superlu/dlacon.c                                   |   250 -
 superlu/dlamch.c                                   |  1004 -
 superlu/dlangs.c                                   |   132 -
 superlu/dlaqgs.c                                   |   158 -
 superlu/dmemory.c                                  |   690 -
 superlu/dmyblas2.c                                 |   246 -
 superlu/dpanel_bmod.c                              |   449 -
 superlu/dpanel_dfs.c                               |   256 -
 superlu/dpivotL.c                                  |   170 -
 superlu/dpivotgrowth.c                             |   129 -
 superlu/dpruneL.c                                  |   156 -
 superlu/dreadhb.c                                  |   277 -
 superlu/dsnode_bmod.c                              |   114 -
 superlu/dsnode_dfs.c                               |   113 -
 superlu/dsp_blas2.c                                |   498 -
 superlu/dsp_blas3.c                                |   141 -
 superlu/dutil.c                                    |   479 -
 superlu/dzsum1.c                                   |   102 -
 superlu/f2c_lite.c                                 |   391 -
 superlu/get_perm_c.c                               |   472 -
 superlu/heap_relax_snode.c                         |   113 -
 superlu/icmax1.c                                   |   124 -
 superlu/izmax1.c                                   |   117 -
 superlu/lsame.c                                    |   111 -
 superlu/memory.c                                   |   230 -
 superlu/mmd.c                                      |  1021 -
 superlu/relax_snode.c                              |    80 -
 superlu/scolumn_bmod.c                             |   360 -
 superlu/scolumn_dfs.c                              |   278 -
 superlu/scomplex.c                                 |   127 -
 superlu/scopy_to_ucol.c                            |   112 -
 superlu/scsum1.c                                   |   111 -
 superlu/sgscon.c                                   |   155 -
 superlu/sgsequ.c                                   |   205 -
 superlu/sgsrfs.c                                   |   446 -
 superlu/sgssv.c                                    |   230 -
 superlu/sgssvx.c                                   |   623 -
 superlu/sgstrf.c                                   |   431 -
 superlu/sgstrs.c                                   |   331 -
 superlu/slacon.c                                   |   249 -
 superlu/slamch.c                                   |  1023 -
 superlu/slangs.c                                   |   131 -
 superlu/slaqgs.c                                   |   157 -
 superlu/slu_Cnames.h                               |   356 -
 superlu/slu_cdefs.h                                |   246 -
 superlu/slu_dcomplex.h                             |    93 -
 superlu/slu_ddefs.h                                |   243 -
 superlu/slu_scomplex.h                             |    93 -
 superlu/slu_sdefs.h                                |   243 -
 superlu/slu_util.h                                 |   287 -
 superlu/slu_zdefs.h                                |   246 -
 superlu/smemory.c                                  |   689 -
 superlu/smyblas2.c                                 |   245 -
 superlu/sp_coletree.c                              |   354 -
 superlu/sp_ienv.c                                  |    86 -
 superlu/sp_preorder.c                              |   224 -
 superlu/spanel_bmod.c                              |   462 -
 superlu/spanel_dfs.c                               |   256 -
 superlu/spivotL.c                                  |   182 -
 superlu/spivotgrowth.c                             |   129 -
 superlu/spruneL.c                                  |   156 -
 superlu/sreadhb.c                                  |   276 -
 superlu/ssnode_bmod.c                              |   115 -
 superlu/ssnode_dfs.c                               |   113 -
 superlu/ssp_blas2.c                                |   481 -
 superlu/ssp_blas3.c                                |   140 -
 superlu/superlu_timer.c                            |    76 -
 superlu/supermatrix.h                              |   165 -
 superlu/sutil.c                                    |   478 -
 superlu/util.c                                     |   405 -
 superlu/xerbla.c                                   |    83 -
 superlu/zcolumn_bmod.c                             |   363 -
 superlu/zcolumn_dfs.c                              |   266 -
 superlu/zcopy_to_ucol.c                            |   112 -
 superlu/zgscon.c                                   |   152 -
 superlu/zgsequ.c                                   |   205 -
 superlu/zgsrfs.c                                   |   456 -
 superlu/zgssv.c                                    |   230 -
 superlu/zgssvx.c                                   |   623 -
 superlu/zgstrf.c                                   |   432 -
 superlu/zgstrs.c                                   |   344 -
 superlu/zlacon.c                                   |   236 -
 superlu/zlangs.c                                   |   131 -
 superlu/zlaqgs.c                                   |   159 -
 superlu/zmemory.c                                  |   689 -
 superlu/zmyblas2.c                                 |   203 -
 superlu/zpanel_bmod.c                              |   477 -
 superlu/zpanel_dfs.c                               |   256 -
 superlu/zpivotL.c                                  |   171 -
 superlu/zpivotgrowth.c                             |   129 -
 superlu/zpruneL.c                                  |   156 -
 superlu/zreadhb.c                                  |   286 -
 superlu/zsnode_bmod.c                              |   129 -
 superlu/zsnode_dfs.c                               |   113 -
 superlu/zsp_blas2.c                                |   576 -
 superlu/zsp_blas3.c                                |   140 -
 superlu/zutil.c                                    |   482 -
 tests/                                 |     3 +-
 tests/                             |    36 +-
 tests/                          |    78 +-
 tests/                         |    16 +-
 319 files changed, 494 insertions(+), 128462 deletions(-)

diff --git a/ b/
index 95b0d912..8049fdc3 100644
--- a/
+++ b/
@@ -18,11 +18,12 @@
-SUBDIRS = m4 cubature @SUPERLU_SRC@ src tests interface contrib bin doc
+SUBDIRS = m4 cubature src tests interface contrib bin doc
 CLEANFILES = so_locations _configs.sed
 # DISTCHECK_CONFIGURE_FLAGS = --enable-matlab --enable-octave
diff --git a/ b/
index 4fcf6c1f..8538453a 100644
--- a/
+++ b/
@@ -31,15 +31,15 @@ AC_INIT(getfem, 5.4.2)
src/getfem/getfem_arch_config.h src/gmm/gmm_arch_config.h])
@@ -75,7 +75,7 @@ AC_ARG_ENABLE(paralevel,
 if test $paralevel -ge 1; then
+  AC_DEFINE_UNQUOTED(GETFEM_PARA_LEVEL,$paralevel,[Parallelization level 
 dnl ---------------------------END OF PARA LEVEL-------------------
@@ -87,12 +87,13 @@ dnl -----------------------------------------------
-AX_PROG_CXX_MPI([test $paralevel -ge 1],[usempi=yes],[usempi=no])
-AX_PROG_CC_MPI([test "x$usempi" = "xyes"],,[usempi=no])
-AX_PROG_FC_MPI([test "x$usempi" = "xyes"],[CPPFLAGS="$CPPFLAGS 
+AX_PROG_CXX_MPI(  [test $paralevel -ge 1],[usempi=yes],[usempi=no])
+AX_PROG_CC_MPI([test "x$usempi" = "xyes"],[usempi=yes],[usempi=no])
+AX_PROG_FC_MPI([test "x$usempi" = "xyes"],[usempi=yes],[usempi=no])
 if  test "x$usempi" = "xyes"; then
-AC_CHECK_LIB(mpi_cxx, _init,,)
+AC_CHECK_LIB(mpi_cxx, _init,
+             [AC_DEFINE(GMM_USES_MPI,,[defined if GMM uses MPI])],)
@@ -311,7 +312,7 @@ if test x$use_rpc = xyes; then
-  AC_DEFINE_UNQUOTED(USE_RPC, 1, [Use rpc for getfem communication with 
+  AC_DEFINE(GETFEM_USE_RPC,,[Use rpc for getfem communication with matlab])
 AM_CONDITIONAL(BUILDMEXRPC, test x$matlab_rpc = xyes)
@@ -654,7 +655,7 @@ fi
 dnl ACX_BLAS([ echo "OK, You have working BLAS libs !"; HAVE_VENDOR_BLAS=1 
],[echo "YOU DONT HAVE BLAS! Using a cheap replacement" ; HAVE_VENDOR_BLAS=0])
+AC_DEFINE(GMM_USES_BLAS,,[defined if GMM is linked to a blas library])
@@ -684,7 +685,7 @@ if test x$useblas64support = xNO && test x$usematlab = 
xYES; then
 if test x$useblas64support = xYES; then
-  AC_DEFINE_UNQUOTED(USE_BLAS64_INTERFACE, 1, [Use blas with 64 bits integers])
+  AC_DEFINE(GMM_USE_BLAS64_INTERFACE,,[Use blas with 64 bits integers])
 dnl ------------------------------LAPACK TEST--------------------------------
@@ -704,8 +705,8 @@ if test x"$acx_blas_ok" = xyes; then
   if test x"$acx_lapack_ok" = xyes; then
+    AC_DEFINE(GMM_USES_LAPACK,,[defined if GMM is linked to a lapack library])
@@ -727,8 +728,8 @@ AC_ARG_ENABLE(multithread-blas,
 if test x$multithread_blas = xNO; then
   AC_CHECK_LIB(dl, dlsym, [acx_dl_ok=yes; DL_LIBS="-ldl"])
   if test x"$acx_dl_ok" = xyes; then
-     AC_DEFINE_UNQUOTED([FORCE_SINGLE_THREAD_BLAS],1,[enable openblas to be 
+    AC_DEFINE(GETFEM_FORCE_SINGLE_THREAD_BLAS,,[enable openblas to be 
 dnl --------------------------end of multithread-blas--------------------
@@ -754,7 +755,7 @@ if test x$useopenmp = xYES; then
   if test "x$ac_cv_prog_cxx_openmp" != "xunsupported" && test 
"x$ac_cv_prog_cxx_openmp" != "x"; then
+    AC_DEFINE(GETFEM_HAS_OPENMP,,[defined if GetFEM is built with OpenMP 
     AC_MSG_ERROR([OpenMP support not found. Use --enable-openmp=no flag to 
compile GetFEM without OpenMP]);
@@ -762,92 +763,215 @@ fi;
 dnl ---------------------------END OF OPENMP-----------------------
-dnl ------------------------------SuperLU config-------------------------
+dnl ------------------------------SUPERLU TEST---------------------------
- [AS_HELP_STRING([--enable-superlu],[turn on/off SuperLU support])],
- [case "${enableval}" in
-   yes) usesuperlu=YES ;;
-   no)  usesuperlu=NO ;;
-   *) AC_MSG_ERROR([bad value ${enableval} for --enable-superlu]) ;;
- esac],[usesuperlu=YES])
-if test x$usesuperlu = xYES; then
-  echo "Building with SuperLU support (use --enable-superlu=no to disable it)"
-  if test x"$FC" = "x"; then
-    sgemm="sgemm_"
+  [AS_HELP_STRING([--enable-superlu], [Enable SuperLU support])],
+  [require_superlu=$enableval],
+  [require_superlu="auto"])
+# the user can override these defaults using --with-superlu=
+ [AS_HELP_STRING([--with-superlu=<lib>],[use SuperLU library <lib>])],
+ [case $with_superlu in
+   yes | "")
+     if test "x$require_superlu" = "xno"; then
+       AC_MSG_ERROR([Contradicting arguments between --enable-superlu and 
+     elif test "x$require_superlu" = "xauto"; then
+       require_superlu="yes"
+     fi;;
+   no)
+     if test "x$require_superlu" = "xyes"; then
+       AC_MSG_ERROR([Contradicting arguments between --enable-superlu and 
+     elif test "x$require_superlu" = "xauto"; then
+       require_superlu="no"
+     fi;;
+   -* | */* | *.a | *.so | *.so.* | *.o| builtin) 
+   *) SUPERLU_LIBS=`echo $with_superlu | sed -e 's/^/-l/g;s/ \+/ -l/g'`;;
+  esac]
+ [AS_HELP_STRING([--with-superlu-include-dir],[directory in which the 
superlu/sl*.h headers can be found])],
+ [ if test x$require_superlu = xno; then
+     AC_MSG_ERROR([Inconsistent options for --enable-superlu, --with-superlu 
and --with-superlu-include-dir.]);
+   else
+     require_superlu="yes"
+     case $withval in
+       -I* ) SUPERLUINC="$withval";;
+       * ) SUPERLUINC="-I$withval";;
+     esac
+   fi;],
+if test "x$require_superlu" = "xno"; then
+  found_superlu="no"
+  echo "Building with SuperLU explicitly disabled";
+    [superlu/slu_Cnames.h superlu/slu_cdefs.h superlu/slu_ddefs.h 
superlu/slu_sdefs.h superlu/slu_zdefs.h \
+     superlu/slu_dcomplex.h superlu/slu_scomplex.h],
+    [found_superlu="yes"],
+    [ if test "x$require_superlu" = "xyes"; then
+        AC_MSG_ERROR([Header files of SuperLU not found.]);
+      else
+        found_superlu="no"
+      fi;
+    ])
+  if test x$found_superlu = xyes; then
+    save_LIBS="$LIBS";
+    AC_CHECK_LIB([superlu], [dCreate_CompCol_Matrix],
+                 [AC_DEFINE(GMM_USES_SUPERLU,,[defined if GMM is linked to the 
superlu library])],
+                 [if test "x$require_superlu" = "xyes"; then
+                    AC_MSG_ERROR([SuperLU library not found]);
+                  else
+                    found_superlu="no"
+                  fi;])
+    if test "x$found_superlu" = "xyes"; then
+      echo "Building with SuperLU (use --enable-superlu=no to disable it)"
+      LIBS="$SUPERLU_LIBS $save_LIBS"
+    else
+      SUPERLU_LIBS=""
+      LIBS="$save_LIBS"
+    fi
+  elif test "x$require_superlu" = "xyes"; then
+    AC_MSG_ERROR([SuperLU header files not found but required by the user. 
Aborting configure...]);
-    AC_FC_FUNC(sgemm)
-    echo "FC=$FC"
+    echo "SuperLU header files not found, building without SuperLU"
-  case $sgemm in
-    sgemm)
-          F77_CALL_C="NOCHANGE";
-          ;;
-    sgemm_)
-          F77_CALL_C="ADD_";
-          ;;
-    SGEMM)
-          F77_CALL_C="UPCASE";
-          ;;
-    sgemm__)
-          F77_CALL_C="ADD__";
-          ;;
-    *)
-          AC_MSG_ERROR(["superlu won't handle this calling convention: sgemm 
-> $sgemm"])
-          ;;
-  esac
-  SUPERLU_SRC="superlu"
-  case $host in
-    *apple*)
-        ;;
-    *mingw*)
-        SUPERLU_LIBS="../$SUPERLU_SRC/.libs/libsuperlu.a"
-        ;;
-    *)
-        SUPERLU_LIBS="`readlink -f .`/$SUPERLU_SRC/"
-        ;;
-  esac
-  echo "Building without SuperLU support (use --enable-superlu=yes to enable 
-  AC_CHECK_LIB([superlu], [dCreate_CompCol_Matrix],[],
-               [AC_MSG_ERROR([SuperLU library not found])])
-  [superlu/colamd.h superlu/slu_Cnames.h \
-   superlu/slu_cdefs.h superlu/slu_ddefs.h superlu/slu_sdefs.h 
superlu/slu_zdefs.h \
-   superlu/slu_dcomplex.h superlu/slu_scomplex.h],
-  [usesuperlu="YES"],
-  [
-    if test "x$usesuperlu" = "xYES"; then
-      AC_MSG_ERROR([header files of superlu not found. Use 
--enable-superlu=yes flag]);
+AM_CONDITIONAL(SUPERLU, test x$found_superlu = xyes)
+if test "x$found_superlu" = "xyes"; then
+  echo "Configuration of SuperLU done"
+dnl ---------------------------END OF SUPERLU TEST-----------------------
+dnl ------------------------------MUMPS TEST-----------------------------
+  [AS_HELP_STRING([--enable-mumps], [Enable MUMPS support])],
+  [require_mumps=$enableval],
+  [require_mumps="auto"])
+# the user can override these defaults using --with-mumps=
+if test $paralevel -le 1; then # default to the typical naming of the 
sequential libraries
+  MUMPS_LIBS="-lsmumps_seq -ldmumps_seq -lcmumps_seq -lzmumps_seq"
+else # default to the common name for the parallel libraries (the user can 
override this using --with-mumps=)
+  MUMPS_LIBS="-lsmumps -ldmumps -lcmumps -lzmumps"
+ [AS_HELP_STRING([--with-mumps=<lib>],[use MUMPS library <lib>])],
+ [case $with_mumps in
+   yes | "")
+     if test "x$require_mumps" = "xno"; then
+       AC_MSG_ERROR([Contradicting arguments between --enable-mumps and 
+     elif test "x$require_mumps" = "xauto"; then
+       require_mumps="yes"
+     fi;;
+   no)
+     if test "x$require_mumps" = "xyes"; then
+       AC_MSG_ERROR([Contradicting arguments between --enable-(par-)mumps and 
+     elif test "x$require_mumps" = "xauto"; then
+       require_mumps="no"
+     fi;;
+   -* | */* | *.a | *.so | *.so.* | *.o| builtin) MUMPS_LIBS="$with_mumps";;
+   *) MUMPS_LIBS=`echo $with_mumps | sed -e 's/^/-l/g;s/ \+/ -l/g'`;;
+  esac]
+ [AS_HELP_STRING([--with-mumps-include-dir],[directory in which the dmumps.h 
header can be found])],
+ [case $withval in
+   -I* ) MUMPSINC="$withval";;
+   * ) MUMPSINC="-I$withval";;
+  esac],
+if test "x$require_mumps" = "xno"; then
+  found_mumps="no"
+  echo "Building with MUMPS explicitly disabled";
+ AC_SEARCH_LIBS(smumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
+   [found_mumps="yes"],
+   [if test "x$require_mumps" = "xyes"; then
+     AC_MSG_ERROR([The function smumps_c couldn't be found in the provided 
MUMPS libraries.]);
-  ])
+    found_mumps="no"]
+ )
+ AC_SEARCH_LIBS(dmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
+   [found_mumps="yes"],
+   [if test "x$require_mumps" = "xyes"; then
+     AC_MSG_ERROR([The function dmumps_c couldn't be found in the provided 
MUMPS libraries.]);
+    fi;
+    found_mumps="no"]
+ )
+ AC_SEARCH_LIBS(cmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
+   [found_mumps="yes"],
+   [if test "x$require_mumps" = "xyes"; then
+     AC_MSG_ERROR([The function cmumps_c couldn't be found in the provided 
MUMPS libraries.]);
+    fi;
+    found_mumps="no"]
+ )
+ AC_SEARCH_LIBS(zmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
+   [found_mumps="yes"],
+   [if test "x$require_mumps" = "xyes"; then
+     AC_MSG_ERROR([The function zmumps_c couldn't be found in the provided 
MUMPS libraries.]);
+    fi;
+    found_mumps="no"]
+ )
+ AC_CHECK_HEADERS([smumps_c.h dmumps_c.h cmumps_c.h zmumps_c.h],
+   [found_mumps="yes"],
+   [if test "x$require_mumps" = "xyes"; then
+     AC_MSG_ERROR([header file dmumps_c.h not found.]);
+    fi;
+    found_mumps="no"]
+ )
-  SUPERLU_LIBS="-lsuperlu"
+ if test "x$found_mumps" = "xyes"; then
+   echo "Building with MUMPS (use --enable-mumps=no to disable it)"
+   AC_DEFINE(GMM_USES_MUMPS,,[defined if GMM is linked to the mumps library])
+ else
+   LIBS="$save_LIBS"
+ fi;
+AM_CONDITIONAL(MUMPS, test x$found_mumps = xyes)
+if test "x$found_mumps" = "xyes"; then
+  echo "Configuration of MUMPS done"
+dnl ---------------------------END OF MUMPS TEST--------------------------
+if test "x$found_superlu" = "xno" -a "x$found_mumps" = "xno"; then
+  AC_MSG_ERROR([Neither MUMPS nor SuperLU was enabled. At least one linear 
solver is required.])
+  exit 1
-echo "Configuration of SuperLU done"
-dnl ----------------EXPERIMENTAL PARTS OF THE LIBRARY--------------------
+dnl ----------------EXPERIMENTAL PARTS OF THE LIBRARY---------------------
         [AS_HELP_STRING([--enable-experimental],[compile experimental parts of 
the library])],
 [ if   test "x$enableval" = "xyes" ; then EXPER="-DEXPERIMENTAL_PURPOSE_ONLY"; 
fi], [EXPER=""])
+dnl ----------------END OF EXPERIMENTAL PARTS OF THE LIBRARY-------------
 dnl -----------------------------QD TESTS--------------------------------
@@ -891,10 +1015,10 @@ int main() {
   fpu_fix_end(&old_cw); return 1-ok;
   ]])],[echo "checking if qd library is working...yes"],[ echo "QD library is 
not working (check config.log)"; exit 1],[])
-  AC_DEFINE_UNQUOTED([HAVE_QDLIB],1,[defined if the qd library was found and 
is working])
+  AC_DEFINE(GETFEM_HAVE_QDLIB,,[defined if the qd library was found and is 
   if test "x$QD_PREC" = "xquad"; then
-    AC_DEFINE_UNQUOTED([QDLIB_USE_QUAD],1,[defined if quad-doubles are to be 
used instead of double-double])
+    AC_DEFINE(GETFEM_QDLIB_USE_QUAD,,[defined if quad-doubles are to be used 
instead of double-double])
 dnl -----------------------------END QD TESTS--------------------------------
@@ -914,7 +1038,9 @@ else
-  AC_CHECK_HEADERS(libqhull_r/qhull_ra.h,[useQHULL="yes"],
+  AC_CHECK_HEADERS(libqhull_r/qhull_ra.h,
+                   [useQHULL="yes"; 
+                                              [defined if the 
<libqhull_r/qhull_ra.h> header file is available])],
     if test "x$useQHULL" = "xyes"; then
       AC_MSG_ERROR([header files libqhull_r/qhull_ra.h not found. Use 
--enable-qhull=no flag]);
@@ -930,116 +1056,6 @@ AC_SUBST([QHULL_LIBS])
 echo "Configuration of qhull done"
 dnl -----------------------------END OF QHULL TEST---------------------------
-dnl ------------------------------MUMPS TEST------------------------------
- [AS_HELP_STRING([--with-mumps-include-dir],[directory in which the dmumps.h 
header can be found])],
- [case $withval in
-   -I* ) MUMPSINC="$withval";;
-   * ) MUMPSINC="-I$withval";;
-  esac],
- [MUMPSINC="-I$GFPREFIX/include"]
-case $host in
-    *mingw*)
-        MUMPS_SEQ_LIBS="-lsmumps -ldmumps -lcmumps -lzmumps -lmumps_common 
-lmpiseq -lpord"
-        ;;
-    *apple*)
-        MUMPS_SEQ_LIBS="-lsmumps -ldmumps -lcmumps -lzmumps -lmumps_common 
-lmpiseq -lpord -lgomp"
-        ;;
-    *)
-        MUMPS_SEQ_LIBS="-lsmumps_seq -ldmumps_seq -lcmumps_seq -lzmumps_seq"
-        ;;
- [AS_HELP_STRING([--enable-mumps],[enable the use of the (sequential) MUMPS 
library. A direct solver for large sparse linear systems.])],
- [case $enableval in
-   yes | "") usemumps="yes"; acx_mumps_ok="yes"; MUMPS_LIBS="$MUMPS_SEQ_LIBS";;
-   no) usemumps="no";;
-  esac],
- [usemumps="test"; acx_mumps_ok="test"; MUMPS_LIBS="$MUMPS_SEQ_LIBS"]
- [AS_HELP_STRING([--enable-par-mumps],[enable the use of the parrallel MUMPS 
library. A direct solver for large sparse linear systems.])],
- [case $enableval in
-   yes | "") usemumps="yes"; MUMPS_LIBS="-lsmumps -ldmumps -lcmumps -lzmumps";;
-   no) usemumps="no";;
-  esac],
- [if test $paralevel -ge 1; then
-    usemumps="test"; acx_mumps_ok="test"; MUMPS_LIBS="-lsmumps -ldmumps 
-lcmumps -lzmumps"
-  fi;]
- [AS_HELP_STRING([--with-mumps=<lib>],[use MUMPS library <lib>])],
- [case $with_mumps in
-   yes | "") usemumps="yes";;
-   no) acx_mumps_ok="no" ;;
-   -* | */* | *.a | *.so | *.so.* | *.o| builtin) MUMPS_LIBS="$with_mumps"; 
acx_mumps_ok="yes" ;;
-   *) MUMPS_LIBS=`echo $with_mumps | sed -e 's/^/-l/g;s/ \+/ -l/g'` ; 
-  esac]
-if test "x$usemumps" = "xno" -o "x$acx_mumps_ok" = "xno"; then
-  echo "Building with MUMPS explicitly disabled";
- AC_SEARCH_LIBS(smumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
-   [usemumps="yes"],
-   [if test "x$acx_mumps_ok" = "xyes"; then
-     AC_MSG_ERROR([The function smumps_c couldn't be found in the provided 
MUMPS libraries.]);
-    fi;
-    usemumps="no"]
- )
- AC_SEARCH_LIBS(dmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
-   [usemumps="yes"],
-   [if test "x$acx_mumps_ok" = "xyes"; then
-     AC_MSG_ERROR([The function dmumps_c couldn't be found in the provided 
MUMPS libraries.]);
-    fi;
-    usemumps="no"]
- )
- AC_SEARCH_LIBS(cmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
-   [usemumps="yes"],
-   [if test "x$acx_mumps_ok" = "xyes"; then
-     AC_MSG_ERROR([The function cmumps_c couldn't be found in the provided 
MUMPS libraries.]);
-    fi;
-    usemumps="no"]
- )
- AC_SEARCH_LIBS(zmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
-   [usemumps="yes"],
-   [if test "x$acx_mumps_ok" = "xyes"; then
-     AC_MSG_ERROR([The function zmumps_c couldn't be found in the provided 
MUMPS libraries.]);
-    fi;
-    usemumps="no"]
- )
- AC_CHECK_HEADERS([smumps_c.h dmumps_c.h cmumps_c.h zmumps_c.h],
-   [usemumps="yes"],
-   [if test "x$acx_mumps_ok" = "xyes"; then
-     AC_MSG_ERROR([header file dmumps_c.h not found.]);
-    fi;
-    usemumps="no"]
- )
- if test "x$usemumps" = "xyes"; then
-   echo "Building with MUMPS (use --enable-mumps=no to disable it)"
- else
-   LIBS="$save_LIBS"
- fi;
-AM_CONDITIONAL(MUMPS, test x$usemumps = xyes)
-echo "Configuration of MUMPS done"
-dnl ---------------------------END OF MUMPS TEST--------------------------
 dnl ---------------------------METIS--------------------------
@@ -1068,10 +1084,10 @@ else
-    AC_DEFINE_UNQUOTED([HAVE_METIS],1,[defined if the Metis library was found 
and is working])
+    AC_DEFINE(GETFEM_HAVE_METIS,,[defined if the Metis library was found and 
is working])
     AC_CHECK_LIB(metis, METIS_SetDefaultOptions, [usemetisnew="yes"],
-                                     [defined if the Metis library found is 
older than version 4])
+                            [defined if the Metis library found is older than 
version 4])
     echo "Building with METIS (use --enable-metis=no to disable it)"
     if test "x$usemetisnew" = "xyes"; then
@@ -1091,8 +1107,9 @@ AC_SUBST([METIS_LIBS])
 dnl ---------------------------END OF METIS--------------------------
+                 AC_DEFINE(GETFEM_HAVE_CXXABI_H,,[defined if the cxxabi.h 
header file is available]))
 dnl ---------------------------- CHECK FOR __PRETTY_FUNCTION__ MACRO --------
 AC_CACHE_CHECK([for __PRETTY_FUNCTION__], ac_cv_have_pretty_function, [
@@ -1100,7 +1117,7 @@ AC_CACHE_CHECK([for __PRETTY_FUNCTION__], 
ac_cv_have_pretty_function, [
                 [ ac_cv_have_pretty_function="yes" ],
                 [ ac_cv_have_pretty_function=="no"  ])])
 if test "x$ac_cv_have_pretty_function" = "xyes"; then
@@ -1112,7 +1129,7 @@ AC_CACHE_CHECK([for execinfo.h and backtrace], 
ac_cv_have_backtrace, [
                 [ ac_cv_have_backtrace="yes" ],
                 [ ac_cv_have_backtrace="no"  ])])
 if test "x$ac_cv_have_backtrace" = "xyes"; then
-        AC_DEFINE_UNQUOTED(HAVE_BACKTRACE,1,[glibc backtrace function])
+        AC_DEFINE(GETFEM_HAVE_BACKTRACE,,[glibc backtrace function])
 dnl ---------------------------- CHECK FOR feenableexcept -----
@@ -1123,7 +1140,7 @@ AC_CACHE_CHECK([for fenv.h and feenableexcept], 
ac_cv_have_feenableexcept, [
                 [ ac_cv_have_feenableexcept="yes" ],
                 [ ac_cv_have_feenableexcept="no"  ])])
 if test "x$ac_cv_have_feenableexcept" = "xyes"; then
-        AC_DEFINE_UNQUOTED(HAVE_FEENABLEEXCEPT,1,[glibc floating point 
exceptions control])
+        AC_DEFINE(GETFEM_HAVE_FEENABLEEXCEPT,,[glibc floating point exceptions 
@@ -1163,7 +1180,6 @@ AC_CONFIG_FILES(                                          
 Makefile                                                                \
 m4/Makefile                                                             \
 cubature/Makefile                                                       \
-$SUPERLU_MAKEFILE                                                       \
 doc/Makefile                                                            \
 doc/sphinx/Makefile                                                     \
 src/Makefile                                                            \
@@ -1245,10 +1261,24 @@ else
   echo "- Qhull not found. Mesh generation will be disabled."
-if test "x$usemumps" = "xyes"; then
+if test "x$found_superlu" = "xyes"; then
+  echo "- SuperLU found. A direct solver for large sparse linear systems."
+  if test "x$require_superlu" = "xno"; then
+    echo "- Not using the SuperLU library for large sparse linear systems."
+  else
+    echo "- SuperLU not found. Not using the SuperLU library for large sparse 
linear systems."
+  fi
+if test "x$found_mumps" = "xyes"; then
   echo "- Mumps found. A direct solver for large sparse linear systems."
-  echo "- Mumps not found. Not using the MUMPS library for large sparse linear 
+  if test "x$require_superlu" = "xno"; then
+    echo "- Not using the MUMPS library for large sparse linear systems."
+  else
+    echo "- Mumps not found. Not using the MUMPS library for large sparse 
linear systems."
+  fi
 if test x"$acx_lapack_ok" = xyes; then
diff --git a/contrib/crack_plate/ 
index f258a788..13ad0a13 100644
--- a/contrib/crack_plate/
+++ b/contrib/crack_plate/
@@ -24,8 +24,6 @@
 #include "getfem/getfem_assembling.h" /* import assembly methods (and norms 
comp.) */
 #include "getfem/getfem_fourth_order.h"
 #include "getfem/getfem_model_solvers.h"
-#include "getfem/getfem_superlu.h"
 using std::endl; using std::cout; using std::cerr;
 using std::ends; using std::cin;
 template <typename T> std::ostream &operator <<
@@ -1084,8 +1082,12 @@ bool bilaplacian_crack_problem::solve(plain_vector &U) {
     gmm::scale(b, -1.);
     plain_vector X(b);
     scalar_type condest;
-    SuperLU_solve(A, X, gmm::scaled(b, scalar_type(-1)), condest, 1);
+#if defined(GMM_USES_MUMPS)
+    gmm::MUMPS_solve(A, X, gmm::scaled(b, scalar_type(-1)));
+    gmm::SuperLU_solve(A, X, gmm::scaled(b, scalar_type(-1)), condest, 1);
     cout << "cond super LU = " << 1./condest << "\n";
     cout << "X = " << gmm::sub_vector(X, gmm::sub_interval(0, 10)) << "\n";
     cout << "U = " << gmm::sub_vector(U, gmm::sub_interval(0, 10)) << "\n";
@@ -1159,10 +1161,15 @@ bool bilaplacian_crack_problem::solve(plain_vector &U) {
       b2[ind_sing[i]] = 0.;
-    SuperLU_solve(A, X1, b1, condest, 1);
+#if defined(GMM_USES_MUMPS)
+    gmm::MUMPS_solve(A, X1, b1);
+    gmm::MUMPS_solve(A, X2, b2);
+    gmm::SuperLU_solve(A, X1, b1, condest, 1);
     cout << "solving for s1 OK, cond = " << 1./condest << "\n";
-    SuperLU_solve(A, X2, b2, condest, 1);
+    gmm::SuperLU_solve(A, X2, b2, condest, 1);
     cout << "solving for s2 OK, cond = " << 1./condest << "\n";
     cout << "X1[ind_sing[0]] = " << X1[ind_sing[0]] << "\n";
     cout << "X1 = " << gmm::sub_vector(X1, gmm::sub_interval(0, 10)) << "\n";
     scalar_type max1 = 0., max2 = 0.;
diff --git a/contrib/icare/ b/contrib/icare/
index 24d05b54..10846366 100644
--- a/contrib/icare/
+++ b/contrib/icare/
@@ -56,7 +56,6 @@ enum {
-#ifdef GMM_USES_MPI
   template <typename VECT> inline void MPI_SUM_VECTOR2(VECT &V) {
     typedef typename gmm::linalg_traits<VECT>::value_type T;
     std::vector<T> W(gmm::vect_size(V));
@@ -65,7 +64,8 @@ enum {
                   MPI_SUM, MPI_COMM_WORLD);
     gmm::copy(W, V);
+  template <typename VECT> inline void MPI_SUM_VECTOR2(VECT &) {}
 struct problem_definition;
@@ -994,8 +994,10 @@ void navier_stokes_problem::solve_PREDICTION_CORRECTION2() 
   K2(nbdof_p,nbdof_p) = 0.0;
-  gmm::SuperLU_factor<double> SLUsys2, SLUsys3,SLUsys1;
+#if !defined(GMM_USES_MUMPS)
+  gmm::SuperLU_factor<double> SLUsys2;
 // dynamic problem
@@ -1139,6 +1141,9 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
+#if !defined(GMM_USES_MUMPS)
+  gmm::SuperLU_factor<double> SLUsys3;
   for (scalar_type t = Tinitial + dt; t <= T; t += dt) {
     //******* Construction of the Matrix for the 3rd system (to obtain 
velocity) **********//
@@ -1173,7 +1178,9 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
       //cout <<"A2 "<< gmm::mat_norminf(A2v) << endl;
+#if !defined(GMM_USES_MUMPS)
     if (time_order == 2 && t == Tinitial+2*dt) { // computed once
@@ -1204,7 +1211,9 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
+#if !defined(GMM_USES_MUMPS)
     //******* The matrix of the 3rd system is constructed and factorized
@@ -1262,9 +1271,7 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
       gmm::add(gmm::scaled(Un0, 2.0/dt),gmm::scaled(Unm1,-0.5/dt),Ytmp);
       gmm::mult(M, Ytmp, gmm::sub_vector(Y, I1));
     // Volumic source term -- inutile d'assambler car F = 0
     //pdef->source_term(*this, t, F);
@@ -1304,18 +1311,14 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
       plain_vector VV(mf_mult.nb_dof());
       getfem::asm_source_term(VV, mim, mf_mult, mf_rhs, F, mpidirrg); // a 
optimiser indept time
       gmm::copy(gmm::sub_vector(VV, SUB_CT_DIR), gmm::sub_vector(Y, I3));
       plain_vector VV(mf_mult.nb_dof());
       getfem::asm_source_term(VV, mim, mf_mult, mf_rhs, F, mpidircylrg); // a 
optimiser  indept time
       gmm::copy(gmm::sub_vector(VV, SUB_CT_DIR_CYL), gmm::sub_vector(Y, I3C));
@@ -1337,9 +1340,7 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
       } else {
          asm_improved_non_reflective_bc(VV, mim, mf_u, Un0, mf_mult, dt, nu, 
       gmm::copy(gmm::sub_vector(VV, SUB_CT_NONREF), gmm::sub_vector(Y, I4));
@@ -1347,9 +1348,7 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
     gmm::mult(gmm::transposed(B), gmm::scaled(Pn0, -1.0), YY);
     gmm::add(YY, gmm::sub_vector(Y, I1));
@@ -1357,9 +1356,8 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
     gmm::mult(gmm::transposed(Bbc), Pn0, YY);
     gmm::add(YY, gmm::sub_vector(Y, I1));
@@ -1381,11 +1379,11 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
-#elif (GETFEM_PARA_LEVEL==1 && defined(GMM_USES_MUMPS))
-      MUMPS_solve(A1,X,Y);
+#elif defined(GMM_USES_MUMPS)
+      gmm::MUMPS_solve(A1,X,Y);
       //#elif (GETFEM_PARA_LEVEL==0 && GMM_USES_MUMPS)
       // SuperLU_solve(A1, X, Y, rcond);
@@ -1395,6 +1393,7 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
       //SuperLU_solve(A1u, Xv, Yv, rcond);
       // Factorisation LU
+      gmm::SuperLU_factor<double> SLUsys1;
@@ -1513,11 +1512,11 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
-#elif (GETFEM_PARA_LEVEL==1 && defined(GMM_USES_MUMPS))
+#elif defined(GMM_USES_MUMPS)
       //#elif (GETFEM_PARA_LEVEL==0 && GMM_USES_MUMPS)
       if (time_order == 1 || t == Tinitial+dt) { //time_order = 1 or first 
iterations with time_order = 2
         SLUsys2.solve(X, Z);
@@ -1575,11 +1574,11 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
-#elif (GETFEM_PARA_LEVEL==1 && defined(GMM_USES_MUMPS))
+#elif defined(GMM_USES_MUMPS)
       //#elif (GETFEM_PARA_LEVEL==0 && defined(GMM_USES_MUMPS))
       //SuperLU_solve(A2, X, Y, rcond);
       //SLUsys3.solve(X, Y);
diff --git a/contrib/xfem_contact/ 
index 3bda6b57..fc787ab7 100644
--- a/contrib/xfem_contact/
+++ b/contrib/xfem_contact/
@@ -647,9 +647,13 @@ int main(int argc, char *argv[]) {
       plain_vector BE(nb_dof), BS(nb_dof);
       for (dal::bv_visitor i(dof_black_list); !i.finished(); ++i) {
         BE[i] = scalar_type(1);
+#if defined(GMM_USES_SUPERLU)
         // TODO: store LU decomp.
         double rcond;
         gmm::SuperLU_solve(EO, BS, BE, rcond);
+        gmm::MUMPS_solve(EO, BS, BE);
         gmm::mult(gmm::transposed(T1), BS, gmm::mat_row(E1, i));
         BE[i] = scalar_type(0);
diff --git a/contrib/xfem_stab_unilat_contact/ 
index 2e150d1a..4f5a7ea4 100644
--- a/contrib/xfem_stab_unilat_contact/
+++ b/contrib/xfem_stab_unilat_contact/
@@ -950,22 +950,29 @@ struct matrix_G {
   const sparse_matrix &S;
   mutable plain_vector W1, W2;
+#if defined(GMM_USES_SUPERLU)
   gmm::SuperLU_factor<scalar_type> SLUF;
   matrix_G(const sparse_matrix &BB, const sparse_matrix &SS)
     : B(BB), S(SS), W1(gmm::mat_nrows(SS)), W2(gmm::mat_nrows(SS)) {
+#if defined(GMM_USES_SUPERLU)
 template <typename vector1, typename vector2>
 void mult(const matrix_G &G, const vector1 &X, vector2 &Y) {
   gmm::mult(gmm::transposed(G.B), X, G.W1);
   // gmm::iteration it(1E-6, 0);
   // gmm::cg(G.S, G.W2, G.W1,  gmm::identity_matrix(), it);
+#if defined(GMM_USES_SUPERLU)
   G.SLUF.solve(G.W2, G.W1);
+  gmm::MUMPS_solve(G.S, G.W2, G.W1);
   gmm::mult(G.B, G.W2, Y);
diff --git a/interface/src/getfemint_precond.h 
index 2fcdf808..d9a7a8c5 100644
--- a/interface/src/getfemint_precond.h
+++ b/interface/src/getfemint_precond.h
@@ -39,7 +39,7 @@
 #include <gmm/gmm_precond_ildltt.h>
 #include <gmm/gmm_precond_ilu.h>
 #include <gmm/gmm_precond_ilut.h>
-#include <getfem/getfem_superlu.h>
+#include <gmm/gmm_superlu_interface.h>
 #include <getfemint_gsparse.h>
 namespace getfemint {
@@ -69,7 +69,9 @@ namespace getfemint {
     std::unique_ptr<gmm::ildltt_precond<cscmat> > ildltt;
     std::unique_ptr<gmm::ilu_precond<cscmat> > ilu;
     std::unique_ptr<gmm::ilut_precond<cscmat> > ilut;
+#if defined(GMM_USES_SUPERLU)
     std::unique_ptr<gmm::SuperLU_factor<T> > superlu;
     virtual size_type memsize() const {
       size_type sz = sizeof(*this);
@@ -81,7 +83,11 @@ namespace getfemint {
       case ILDLT:   sz += ildlt->memsize(); break;
       case ILDLTT:  sz += ildltt->memsize(); break;
       case SUPERLU:
+#if defined(GMM_USES_SUPERLU)
         sz += size_type(superlu->memsize());
+        GMM_ASSERT1(false, "GetFEM built without SuperLU support");
       case SPMAT:   sz += gsp->memsize(); break;
@@ -140,8 +146,12 @@ namespace gmm {
         else gmm::transposed_mult(*precond.ilut, v, w);
       case getfemint::gprecond_base::SUPERLU:
+#if defined(GMM_USES_SUPERLU)
         if (do_mult) precond.superlu->solve(w,v);
         else precond.superlu->solve(w,v,gmm::SuperLU_factor<T>::LU_TRANSP);
+        GMM_ASSERT1(false, "GetFEM built without SuperLU support");
       case getfemint::gprecond_base::SPMAT:
         precond.gsp->mult_or_transposed_mult(v, w, !do_mult);
diff --git a/interface/src/ b/interface/src/
index c4c6f063..d3f7da65 100644
--- a/interface/src/
+++ b/interface/src/
@@ -24,7 +24,7 @@
 #include <getfemint.h>
 #include <gmm/gmm_iter_solvers.h>
 #include <getfemint_misc.h>
-#include <getfem/getfem_superlu.h>
+#include <gmm/gmm_superlu_interface.h>
 #include <gmm/gmm_MUMPS_interface.h>
 using namespace getfemint;
@@ -85,6 +85,7 @@ void iterative_gmm_solver(iterative_gmm_solver_type stype,
   else                  iterative_gmm_solver(stype, gsp, in, out, 
+#if defined(GMM_USES_SUPERLU)
 template <typename T> static void
 superlu_solver(gsparse &gsp,
                getfemint::mexargs_in& in, getfemint::mexargs_out& out, T) {
@@ -96,6 +97,7 @@ superlu_solver(gsparse &gsp,
   if (out.remaining())
     out.pop().from_scalar(rcond ? 1./rcond : 0.);
 #if defined(GMM_USES_MUMPS)
 template <typename T> static void
@@ -178,6 +180,7 @@ void gf_linsolve(getfemint::mexargs_in& m_in, 
getfemint::mexargs_out& m_out) {
+#if defined(GMM_USES_SUPERLU)
     /*@FUNC @CELL{U, cond} = ('lu', @tsp M, @vec b)
       Alias for ::LINSOLVE('superlu',...)@*/
@@ -203,6 +206,7 @@ void gf_linsolve(getfemint::mexargs_in& m_in, 
getfemint::mexargs_out& m_out) {
        if (gsp.is_complex()) superlu_solver(gsp, in, out, complex_type());
        else                  superlu_solver(gsp, in, out, scalar_type());
 #if defined(GMM_USES_MUMPS)
     /*@FUNC @CELL{U, cond} = ('mumps', @tsp M, @vec b)
diff --git a/interface/src/ b/interface/src/
index 970eeff7..dfd42514 100644
--- a/interface/src/
+++ b/interface/src/
@@ -68,6 +68,7 @@ precond_ilut(gsparse &M, int additional_fillin, double 
threshold, mexargs_out& o
   p.ilut = std::make_unique<gmm::ilut_precond<typename 
gprecond<T>::cscmat>>(, additional_fillin, threshold);
+#if defined(GMM_USES_SUPERLU)
 template <typename T> static void
 precond_superlu(gsparse &M, mexargs_out& out, T) {
   gprecond<T> &p = precond_new(out, T());
@@ -75,6 +76,7 @@ precond_superlu(gsparse &M, mexargs_out& out, T) {
   p.superlu = std::make_unique<gmm::SuperLU_factor<T>>();
 static void precond_spmat(gsparse *gsp, mexargs_out& out) {
   if (gsp->is_complex()) {
@@ -213,6 +215,7 @@ void gf_precond(getfemint::mexargs_in& m_in, 
getfemint::mexargs_out& m_out) {
                                         out, scalar_type());
+#if defined(GMM_USES_SUPERLU)
     /*@INIT PC = ('superlu', @tsp m)
       Uses SuperLU to build an exact factorization of the sparse matrix `m`.
       This preconditioner is only available if the getfem-interface was
@@ -224,6 +227,7 @@ void gf_precond(getfemint::mexargs_in& m_in, 
getfemint::mexargs_out& m_out) {
        if (M->is_complex()) precond_superlu(*M, out, complex_type());
        else                 precond_superlu(*M, out, scalar_type());
     /*@INIT PC = ('spmat', @tsp m)
       Preconditioner given explicitely by a sparse matrix.@*/
diff --git a/interface/src/gfi_array.h b/interface/src/gfi_array.h
index 835e8234..8173b657 100644
--- a/interface/src/gfi_array.h
+++ b/interface/src/gfi_array.h
@@ -33,7 +33,7 @@
 #define GFI_ARRAY
 #include <sys/types.h>
-#ifdef USE_RPC
 # include <rpc/types.h>
 # ifndef __u_char_defined
diff --git a/interface/src/octave/gfi_array.h b/interface/src/octave/gfi_array.h
index e12c6b2c..990905fa 100644
--- a/interface/src/octave/gfi_array.h
+++ b/interface/src/octave/gfi_array.h
@@ -33,7 +33,7 @@
 #define GFI_ARRAY
 #include <sys/types.h>
-#ifdef USE_RPC
 # include <rpc/types.h>
 # ifndef __u_char_defined
diff --git a/interface/src/python/getfem_python.c 
index eb9a4641..0f5c6f5d 100644
--- a/interface/src/python/getfem_python.c
+++ b/interface/src/python/getfem_python.c
@@ -795,18 +795,16 @@ getfem_env(PyObject *self, PyObject *args) {
     word_out = PyUnicode_FromString("";);
   } else if (strcmp(word_in,"license") == 0) {
     word_out = PyUnicode_FromString("GNU LGPL v3");
-  } else if (strcmp(word_in,"package") == 0) {
-    word_out = PyUnicode_FromString(GETFEM_PACKAGE);
-  } else if (strcmp(word_in,"package_name") == 0) {
+  } else if (strcmp(word_in,"package") == 0 ||
+             strcmp(word_in,"package_name") == 0) {
     word_out = PyUnicode_FromString(GETFEM_PACKAGE_NAME);
   } else if (strcmp(word_in,"package_string") == 0) {
     word_out = PyUnicode_FromString(GETFEM_PACKAGE_STRING);
-  } else if(strcmp(word_in,"package_tarname") == 0) {
+  } else if (strcmp(word_in,"package_tarname") == 0) {
     word_out = PyUnicode_FromString(GETFEM_PACKAGE_TARNAME);
-  } else if(strcmp(word_in,"package_version") == 0 ||
-            strcmp(word_in,"release") == 0) {
-    word_out = PyUnicode_FromString(GETFEM_PACKAGE_VERSION);
-  } else if(strcmp(word_in,"version") == 0) {
+  } else if (strcmp(word_in,"package_version") == 0 ||
+             strcmp(word_in,"release") == 0 ||
+             strcmp(word_in,"version") == 0) {
     word_out = PyUnicode_FromString(GETFEM_VERSION);
   } else {
     word_out = PyUnicode_FromString("");
diff --git a/src/ b/src/
index 1e10d741..c002f042 100644
--- a/src/
+++ b/src/
@@ -158,7 +158,6 @@ nobase_include_HEADERS =                            \
        getfem/getfem_nonlinear_elasticity.h            \
        getfem/getfem_fourth_order.h                    \
        getfem/getfem_Navier_Stokes.h                   \
-       getfem/getfem_superlu.h                         \
        getfem/getfem_plasticity.h                      \
        getfem/getfem_omp.h                             \
        getfem/getfem_continuation.h                    \
@@ -190,7 +189,6 @@ SRC =                                               \                                  \                                \                         \
-                               \                                  \                           \                               \
diff --git a/src/getfem/bgeot_config.h b/src/getfem/bgeot_config.h
index ee5e7783..9c7ce5e9 100644
--- a/src/getfem/bgeot_config.h
+++ b/src/getfem/bgeot_config.h
@@ -37,6 +37,7 @@
 #ifndef BGEOT_CONFIG_H__
 #define BGEOT_CONFIG_H__
+#include "gmm/gmm_arch_config.h"
 #include "getfem/getfem_arch_config.h"
diff --git a/src/getfem/ 
new file mode 100644
index 00000000..d50a5028
--- /dev/null
+++ b/src/getfem/
@@ -0,0 +1,49 @@
+/* manually created template to be used by autotools.  
+/* enable openblas to be multithreaded */
+/* defined if GetFEM is built with OpenMP parallelization */
+/* glibc backtrace function */
+/* defined if the cxxabi.h header file is available */
+/* glibc floating point exceptions control */
+/* defined if the <libqhull_r/qhull_ra.h> header file is available */
+/* defined if the Metis library was found and is working */
+/* defined if the Metis library found is older than version 4 */
+/* defined if the qd library was found and is working */
+/* GetFEM package name */
+/* GetFEM package string */
+/* GetFEM package tarname */
+/* Parallelization level (0|1|2) */
+/* defined if quad-doubles are to be used instead of double-double */
+/* Use rpc for getfem communication with matlab */
+/* GetFEM version */
diff --git a/src/getfem/getfem_config.h b/src/getfem/getfem_config.h
index b04219fe..64bd5e1d 100644
--- a/src/getfem/getfem_config.h
+++ b/src/getfem/getfem_config.h
@@ -167,13 +167,6 @@
 #define GETFEM_MPI_INIT(argc, argv) {GMM_TRACE1("Running sequential Getfem");}
-#if defined(GETFEM_HAVE_DMUMPS_C_H)
-# if !defined(GMM_USES_MUMPS)
-#   define GMM_USES_MUMPS
-# endif
 #if GMM_USES_MPI > 0
 # include <mpi.h>
@@ -190,28 +183,12 @@
 # define GETFEM_MPI_FINALIZE { MPI_Finalize(); }
-// GETFEM_PARA_SOLVER is the parallelisation solver used
-//    MUMPS       : use direct parallel solver MUMPS
-//    SCHWARZADD  : use a Schwarz additive method
-# endif
-#  ifndef GMM_USES_MUMPS
-#    define GMM_USES_MUMPS
-#  endif
-# endif
 #include "bgeot_tensor.h"
 #include "bgeot_poly.h"
-#include "getfem_superlu.h"
 /// GEneric Tool for Finite Element Methods.
 namespace getfem {
@@ -221,6 +198,17 @@ namespace getfem {
   using gmm::vref;
+// GETFEM_PARA_SOLVER is the parallelisation solver used
+//    MUMPS       : use direct parallel solver MUMPS
+//    SCHWARZADD  : use a Schwarz additive method
+# endif
   template <typename T> inline T MPI_SUM_SCALAR(T a) {
     T b; MPI_Allreduce(&a,&b,1,gmm::mpi_type(a), MPI_SUM, MPI_COMM_WORLD);
     return b;
diff --git a/src/getfem/getfem_model_solvers.h 
index 4472bb9e..da570c01 100644
--- a/src/getfem/getfem_model_solvers.h
+++ b/src/getfem/getfem_model_solvers.h
@@ -40,6 +40,7 @@
 #include "getfem_models.h"
+#include "gmm/gmm_superlu_interface.h"
 #include "gmm/gmm_MUMPS_interface.h"
 #include "gmm/gmm_iter.h"
 #include "gmm/gmm_iter_solvers.h"
@@ -141,6 +142,7 @@ namespace getfem {
+#if defined(GMM_USES_SUPERLU)
   template <typename MAT, typename VECT>
   struct linear_solver_superlu
     : public abstract_linear_solver<MAT, VECT> {
@@ -150,11 +152,12 @@ namespace getfem {
       /*gmm::HarwellBoeing_IO::write("test.hb", M);
       std::fstream f("bbb", std::ios::out);
       for (unsigned i=0; i < gmm::vect_size(b); ++i) f << b[i] << "\n";*/
-      int info = SuperLU_solve(M, x, b, rcond);
+      int info = gmm::SuperLU_solve(M, x, b, rcond);
       iter.enforce_converged(info == 0);
       if (iter.get_noisy()) cout << "condition number: " << 1.0/rcond<< endl;
   template <typename MAT, typename VECT>
   struct linear_solver_dense_lu : public abstract_linear_solver<MAT, VECT> {
@@ -640,8 +643,11 @@ namespace getfem {
         return std::make_shared<linear_solver_mumps_sym<MATRIX, VECTOR>>();
         return std::make_shared<linear_solver_mumps<MATRIX, VECTOR>>();
-# else
+# elif defined(GMM_USES_SUPERLU)
       return std::make_shared<linear_solver_superlu<MATRIX, VECTOR>>();
+# else
+      static_assert(false,
+                    "At least one direct solver (MUMPS or SuperLU) is 
 # endif
     else {
@@ -665,8 +671,13 @@ namespace getfem {
   std::shared_ptr<abstract_linear_solver<MATRIX, VECTOR>>
   select_linear_solver(const model &md, const std::string &name) {
     std::shared_ptr<abstract_linear_solver<MATRIX, VECTOR>> p;
-    if (bgeot::casecmp(name, "superlu") == 0)
+    if (bgeot::casecmp(name, "superlu") == 0) {
+#if defined(GMM_USES_SUPERLU)
       return std::make_shared<linear_solver_superlu<MATRIX, VECTOR>>();
+      GMM_ASSERT1(false, "SuperLU is not interfaced");
+    }
     else if (bgeot::casecmp(name, "dense_lu") == 0)
       return std::make_shared<linear_solver_dense_lu<MATRIX, VECTOR>>();
     else if (bgeot::casecmp(name, "mumps") == 0) {
@@ -723,10 +734,10 @@ namespace getfem {
   add a special traitement on the problem, etc ...  This is in
   fact a model for your own solver.
-  For small problems, a direct solver is used
-  (getfem::SuperLU_solve), for larger problems, a conjugate
-  gradient gmm::cg (if the problem is coercive) or a gmm::gmres is
-  used (preconditioned with an incomplete factorization).
+  For small problems, a direct solver is used (gmm::SuperLU_solve),
+  for larger problems, a conjugate gradient gmm::cg (if the problem is
+  coercive) or a gmm::gmres is used (preconditioned with an incomplete
+  factorization).
   When MPI/METIS is enabled, a partition is done via METIS, and a parallel
   solver can be used.
diff --git a/src/getfem/getfem_superlu.h b/src/getfem/getfem_superlu.h
deleted file mode 100644
index 06b7c738..00000000
--- a/src/getfem/getfem_superlu.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/* -*- c++ -*- (enables emacs c++ mode) */
- Copyright (C) 2004-2020 Julien Pommier
- This file is a part of GetFEM
- GetFEM  is  free software;  you  can  redistribute  it  and/or modify it
- under  the  terms  of the  GNU  Lesser General Public License as published
- by  the  Free Software Foundation;  either version 3 of the License,  or
- (at your option) any later version along with the GCC Runtime Library
- Exception either version 3.1 or (at your option) any later version.
- This program  is  distributed  in  the  hope  that it will be useful,  but
- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
- License and GCC Runtime Library Exception for more details.
- You  should  have received a copy of the GNU Lesser General Public License
- along  with  this program;  if not, write to the Free Software Foundation,
- Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
- As a special exception, you  may use  this file  as it is a part of a free
- software  library  without  restriction.  Specifically,  if   other  files
- instantiate  templates  or  use macros or inline functions from this file,
- or  you compile this  file  and  link  it  with other files  to produce an
- executable, this file  does  not  by itself cause the resulting executable
- to be covered  by the GNU Lesser General Public License.  This   exception
- does not  however  invalidate  any  other  reasons why the executable file
- might be covered by the GNU Lesser General Public License.
-/**@file getfem_superlu.h
-   @author  Julien Pommier <>
-   @date August 2004.
-   @brief SuperLU interface for getfem
-   We do not use gmm_superlu_interface.h for a good reason. This file
-   does not include any of the superlu headers, hence when getfem is
-   installed, it does not need to install the superlu headers.
-#include "getfem_config.h"
-#include "gmm/gmm_kernel.h"
-namespace gmm {
-  template<typename T>
-  int SuperLU_solve(const gmm::csc_matrix<T> &A, T *X_, T *B, double& rcond_, 
int permc_spec = 3);
-  /** solve a sparse linear system AX=B (float, double, complex<float>
-      or complex<double>) via SuperLU.
-      @param A the matrix (a copy is made if A is not a gmm::csc_matrix)
-      @param X the solution.
-      @param B the right hand side.
-      @param rcond_ contains on output an estimate of the condition number of 
-      @param permc_spec specify the kind of renumbering than SuperLU should do.
-  */
-  template<typename MAT, typename V1, typename V2>
-  int SuperLU_solve(const MAT &A, const V1& X, const V2& B, double& rcond_, 
int permc_spec = 3) {
-    typedef typename gmm::linalg_traits<MAT>::value_type T;
-    int m = int(mat_nrows(A)), n = int(mat_ncols(A));
-    gmm::csc_matrix<T> csc_A(m,n); 
-    gmm::copy(A,csc_A);
-    std::vector<T> rhs(m), sol(m);
-    gmm::copy(B, rhs);
-    int info = SuperLU_solve(csc_A, &sol[0], &rhs[0], rcond_, permc_spec);
-    gmm::copy(sol, const_cast<V1 &>(X));
-    return info;
-  }
-  struct SuperLU_factor_impl_common;
-  /** Factorization of a sparse matrix with SuperLU.
-      This class can be used as a preconditioner for gmm iterative solvers.
-  */
-  template <class T> class SuperLU_factor {
-    std::shared_ptr<SuperLU_factor_impl_common> impl;
-  public :
-    /** Do the factorization of the supplied sparse matrix. */
-    template <class MAT> void build_with(const MAT &A,  int permc_spec = 3) {
-      int m = int(mat_nrows(A)), n = int(mat_ncols(A));
-      gmm::csc_matrix<T> csc_A(m,n); 
-      gmm::copy(A,csc_A);
-      build_with(csc_A, permc_spec);
-    }
-    void build_with(const gmm::csc_matrix<T> &A, int permc_spec = 3);
-    template <typename VECTX, typename VECTB> 
-    /** After factorization, do the triangular solves.
-       transp = LU_NOTRANSP   -> solves Ax = B
-       transp = LU_TRANSP     -> solves A'x = B
-       transp = LU_CONJUGATED -> solves conj(A)X = B 
-    */
-    void solve(const VECTX &X, const VECTB &B, int transp=LU_NOTRANSP) const {
-      gmm::copy(B, rhs());
-      solve(transp);
-      gmm::copy(sol(),const_cast<VECTX &>(X));
-    }
-    void solve(int transp=LU_NOTRANSP) const;
-    std::vector<T> &sol() const;
-    std::vector<T> &rhs() const;
-    SuperLU_factor();
-    float memsize() const;
-    SuperLU_factor(const SuperLU_factor& other);
-    SuperLU_factor& operator=(const SuperLU_factor& other);
-  };
-  template <typename T, typename V1, typename V2> inline
-  void mult(const SuperLU_factor<T>& P, const V1 &v1, const V2 &v2) {
-    P.solve(v2,v1);
-  }
-  template <typename T, typename V1, typename V2> inline
-  void transposed_mult(const SuperLU_factor<T>& P,const V1 &v1,const V2 &v2) {
-    P.solve(v2, v1, SuperLU_factor<T>::LU_TRANSP);
-  }
-extern "C" void set_superlu_callback(int (*cb)());
diff --git a/src/ b/src/
deleted file mode 100644
index fe7d2803..00000000
--- a/src/
+++ /dev/null
@@ -1,430 +0,0 @@
- Copyright (C) 2004-2020 Julien Pommier
- This file is a part of GetFEM
- GetFEM  is  free software;  you  can  redistribute  it  and/or modify it
- under  the  terms  of the  GNU  Lesser General Public License as published
- by  the  Free Software Foundation;  either version 3 of the License,  or
- (at your option) any later version along with the GCC Runtime Library
- Exception either version 3.1 or (at your option) any later version.
- This program  is  distributed  in  the  hope  that it will be useful,  but
- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
- License and GCC Runtime Library Exception for more details.
- You  should  have received a copy of the GNU Lesser General Public License
- along  with  this program;  if not, write to the Free Software Foundation,
- Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
-#include "getfem/getfem_superlu.h"
-typedef int int_t;
-/* because slu_util.h defines TRUE and FALSE ... */
-#ifdef TRUE
-# undef TRUE
-#ifdef FALSE
-# undef FALSE
-#include "superlu/slu_Cnames.h"
-#include "superlu/supermatrix.h"
-#include "superlu/slu_util.h"
-namespace SuperLU_S {
-#include "superlu/slu_sdefs.h"
-namespace SuperLU_D {
-#include "superlu/slu_ddefs.h"
-namespace SuperLU_C {
-#include "superlu/slu_cdefs.h"
-namespace SuperLU_Z {
-#include "superlu/slu_zdefs.h"
-namespace gmm {
-  /*  interface for Create_CompCol_Matrix */
-  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
-                                    float *a, int *ir, int *jc) {
-    SuperLU_S::sCreate_CompCol_Matrix(A, m, n, nnz, a, ir, jc,
-                                      SLU_NC, SLU_S, SLU_GE);
-  }
-  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
-                                    double *a, int *ir, int *jc) {
-    SuperLU_D::dCreate_CompCol_Matrix(A, m, n, nnz, a, ir, jc,
-                                      SLU_NC, SLU_D, SLU_GE);
-  }
-  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
-                                    std::complex<float> *a, int *ir, int *jc) {
-    SuperLU_C::cCreate_CompCol_Matrix(A, m, n, nnz, (SuperLU_C::complex *)(a),
-                                      ir, jc, SLU_NC, SLU_C, SLU_GE);
-  }
-  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
-                             std::complex<double> *a, int *ir, int *jc) {
-    SuperLU_Z::zCreate_CompCol_Matrix(A, m, n, nnz,
-                                      (SuperLU_Z::doublecomplex *)(a), ir, jc,
-                                      SLU_NC, SLU_Z, SLU_GE);
-  }
-  /*  interface for Create_Dense_Matrix */
-  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, float *a, int 
-  { SuperLU_S::sCreate_Dense_Matrix(A, m, n, a, k, SLU_DN, SLU_S, SLU_GE); }
-  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, double *a, int 
-  { SuperLU_D::dCreate_Dense_Matrix(A, m, n, a, k, SLU_DN, SLU_D, SLU_GE); }
-  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n,
-                                  std::complex<float> *a, int k) {
-    SuperLU_C::cCreate_Dense_Matrix(A, m, n, (SuperLU_C::complex *)(a),
-                                    k, SLU_DN, SLU_C, SLU_GE);
-  }
-  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n,
-                                  std::complex<double> *a, int k) {
-    SuperLU_Z::zCreate_Dense_Matrix(A, m, n, (SuperLU_Z::doublecomplex *)(a),
-                                    k, SLU_DN, SLU_Z, SLU_GE);
-  }
-  /*  interface for gssv */
-  inline void SuperLU_gssv(superlu_options_t *options, SuperMatrix *A, int *p, 
-  int *q, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,               \
-  SuperLUStat_t *stats, int *info, KEYTYPE) {                           \
-  NAMESPACE::FNAME(options, A, p, q, L, U, B, stats, info);             \
-  }
-  DECL_GSSV(SuperLU_S,sgssv,float,float)
-  DECL_GSSV(SuperLU_C,cgssv,float,std::complex<float>)
-  DECL_GSSV(SuperLU_D,dgssv,double,double)
-  DECL_GSSV(SuperLU_Z,zgssv,double,std::complex<double>)
-  /*  interface for gssvx */
-    inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A,     
-                               int *perm_c, int *perm_r, int *etree, char 
*equed,  \
-                               FLOATTYPE *R, FLOATTYPE *C, SuperMatrix *L,     
-                               SuperMatrix *U, void *work, int lwork,          
-                               SuperMatrix *B, SuperMatrix *X,                 
-                               FLOATTYPE *recip_pivot_growth,                  
-                               FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE 
*berr, \
-                               SuperLUStat_t *stats, int *info, KEYTYPE) {     
-    NAMESPACE::mem_usage_t mem_usage;                                    \
-    NAMESPACE::FNAME(options, A, perm_c, perm_r, etree, equed, R, C, L,  \
-                     U, work, lwork, B, X, recip_pivot_growth, rcond,    \
-                     ferr, berr, &mem_usage, stats, info);               \
-    return mem_usage.for_lu; /* bytes used by the factor storage */     \
-  }
-  DECL_GSSVX(SuperLU_S,sgssvx,float,float)
-  DECL_GSSVX(SuperLU_C,cgssvx,float,std::complex<float>)
-  DECL_GSSVX(SuperLU_D,dgssvx,double,double)
-  DECL_GSSVX(SuperLU_Z,zgssvx,double,std::complex<double>)
-  /* ********************************************************************* */
-  /*   SuperLU solve interface                                             */
-  /* ********************************************************************* */
-  template<typename T>
-  int SuperLU_solve(const gmm::csc_matrix<T> &csc_A, T *sol, T *rhs,
-                    double& rcond_, int permc_spec) {
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = 0: use the natural ordering
-     *   permc_spec = 1: use minimum degree ordering on structure of A'*A
-     *   permc_spec = 2: use minimum degree ordering on structure of A'+A
-     *   permc_spec = 3: use approximate minimum degree column ordering
-     */
-    typedef typename gmm::number_traits<T>::magnitude_type R;
-    int m = int(mat_nrows(csc_A)), n = int(mat_ncols(csc_A));
-    int nrhs = 1, info = 0, nz = int(nnz(csc_A));
-    GMM_ASSERT1(nz != 0, "Cannot factor a matrix full of zeros!");
-    GMM_ASSERT1(n == m, "Cannot factor a non-square matrix");
-    if ((2 * nz / n) >= m)
-      GMM_WARNING2("CAUTION : it seems that SuperLU has a problem"
-                   " for nearly dense sparse matrices");
-    superlu_options_t options;
-    set_default_options(&options);
-    options.ColPerm = NATURAL;
-    options.PrintStat = NO;
-    options.ConditionNumber = YES;
-    switch (permc_spec) {
-    case 1 : options.ColPerm = MMD_ATA; break;
-    case 2 : options.ColPerm = MMD_AT_PLUS_A; break;
-    case 3 : options.ColPerm = COLAMD; break;
-    }
-    SuperLUStat_t stat;
-    StatInit(&stat);
-    SuperMatrix SA, SL, SU, SB, SX; // SuperLU format.
-    Create_CompCol_Matrix(&SA, m, n, nz, const_cast<T*>(&[0]),
-                          const_cast<int *>((const int *)(&[0])),
-                          const_cast<int *>((const int *)(&csc_A.jc[0])));
-    Create_Dense_Matrix(&SB, m, nrhs, &rhs[0], m);
-    Create_Dense_Matrix(&SX, m, nrhs, &sol[0], m);
-    memset(&SL,0,sizeof SL);
-    memset(&SU,0,sizeof SU);
-    std::vector<int> etree(n);
-    char equed[] = "B";
-    std::vector<R> Rscale(m),Cscale(n); // row scale factors
-    std::vector<R> ferr(nrhs), berr(nrhs);
-    R recip_pivot_gross, rcond;
-    std::vector<int> perm_r(m), perm_c(n);
-    SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0],
-                  &etree[0] /* output */, equed /* output         */,
-                  &Rscale[0] /* row scale factors (output)        */,
-                  &Cscale[0] /* col scale factors (output)        */,
-                  &SL /* fact L (output)*/, &SU /* fact U (output)*/,
-                  NULL /* work                                    */,
-                  0 /* lwork: superlu auto allocates (input)      */,
-                  &SB /* rhs */, &SX /* solution                  */,
-                  &recip_pivot_gross /* reciprocal pivot growth   */
-                  /* factor max_j( norm(A_j)/norm(U_j) ).         */,
-                  &rcond /*estimate of the reciprocal condition   */
-                  /* number of the matrix A after equilibration   */,
-                  &ferr[0] /* estimated forward error             */,
-                  &berr[0] /* relative backward error             */,
-                  &stat, &info, T());
-    rcond_ = rcond;
-    if (SB.Store) Destroy_SuperMatrix_Store(&SB);
-    if (SX.Store) Destroy_SuperMatrix_Store(&SX);
-    if (SA.Store) Destroy_SuperMatrix_Store(&SA);
-    if (SL.Store) Destroy_SuperNode_Matrix(&SL);
-    if (SU.Store) Destroy_CompCol_Matrix(&SU);
-    StatFree(&stat);
-    GMM_ASSERT1(info != -333333333, "SuperLU was cancelled."); // user 
interruption (for matlab interface)
-    GMM_ASSERT1(info >= 0, "SuperLU solve failed: info =" << info);
-    if (info > 0) GMM_WARNING1("SuperLU solve failed: info =" << info);
-    return info;
-  }
-  template int SuperLU_solve(const gmm::csc_matrix<float> &csc_A, float *sol, 
float *rhs, double& rcond_, int permc_spec);
-  template int SuperLU_solve(const gmm::csc_matrix<double> &csc_A, double 
*sol, double *rhs, double& rcond_, int permc_spec);
-  template int SuperLU_solve(const gmm::csc_matrix<std::complex<float> > 
&csc_A, std::complex<float> *sol, std::complex<float> *rhs, double& rcond_, int 
-  template int SuperLU_solve(const gmm::csc_matrix<std::complex<double> > 
&csc_A, std::complex<double> *sol, std::complex<double> *rhs, double& rcond_, 
int permc_spec);
-  struct SuperLU_factor_impl_common {
-    mutable SuperMatrix SA, SL, SB, SU, SX;
-    mutable SuperLUStat_t stat;
-    mutable superlu_options_t options;
-    float memory_used;
-    mutable bool is_init;
-    mutable char equed;
-    void free_supermatrix() {
-      if (is_init) {
-        if (SB.Store) Destroy_SuperMatrix_Store(&SB);
-        if (SX.Store) Destroy_SuperMatrix_Store(&SX);
-        if (SA.Store) Destroy_SuperMatrix_Store(&SA);
-        if (SL.Store) Destroy_SuperNode_Matrix(&SL);
-        if (SU.Store) Destroy_CompCol_Matrix(&SU);
-      }
-    }
-    SuperLU_factor_impl_common() : is_init(false) {}
-    virtual ~SuperLU_factor_impl_common() { free_supermatrix(); }
-  };
-  template <typename T> struct SuperLU_factor_impl : public 
SuperLU_factor_impl_common {
-    typedef typename gmm::number_traits<T>::magnitude_type R;
-    std::vector<int> etree, perm_r, perm_c;
-    std::vector<R> Rscale, Cscale;
-    std::vector<R> ferr, berr;
-    std::vector<T> rhs;
-    std::vector<T> sol;
-    void build_with(const gmm::csc_matrix<T> &A, int permc_spec);
-    void solve(int transp);
-  };
-  template <typename T>
-  void SuperLU_factor_impl<T>::build_with(const gmm::csc_matrix<T> &A, int 
permc_spec) {
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = 0: use the natural ordering
-     *   permc_spec = 1: use minimum degree ordering on structure of A'*A
-     *   permc_spec = 2: use minimum degree ordering on structure of A'+A
-     *   permc_spec = 3: use approximate minimum degree column ordering
-     */
-    free_supermatrix();
-    int n = int(mat_nrows(A)), m = int(mat_ncols(A)), info = 0;
-    rhs.resize(m); sol.resize(m);
-    gmm::clear(rhs);
-    int nz = int(nnz(A));
-    GMM_ASSERT1(nz != 0, "Cannot factor a matrix full of zeros!");
-    GMM_ASSERT1(n == m, "Cannot factor a non-square matrix");
-    set_default_options(&options);
-    options.ColPerm = NATURAL;
-    options.PrintStat = NO;
-    options.ConditionNumber = NO;
-    switch (permc_spec) {
-      case 1 : options.ColPerm = MMD_ATA; break;
-      case 2 : options.ColPerm = MMD_AT_PLUS_A; break;
-      case 3 : options.ColPerm = COLAMD; break;
-    }
-    StatInit(&stat);
-    Create_CompCol_Matrix(&SA, m, n, nz, const_cast<T*>(&[0]),
-                          const_cast<int *>((const int *)(&[0])),
-                          const_cast<int *>((const int *)(&A.jc[0])));
-    Create_Dense_Matrix(&SB, m, 0, &rhs[0], m);
-    Create_Dense_Matrix(&SX, m, 0, &sol[0], m);
-    memset(&SL,0,sizeof SL);
-    memset(&SU,0,sizeof SU);
-    equed = 'B';
-    Rscale.resize(m); Cscale.resize(n); etree.resize(n);
-    ferr.resize(1); berr.resize(1);
-    R recip_pivot_gross, rcond;
-    perm_r.resize(m); perm_c.resize(n);
-    memory_used = SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0],
-                                &etree[0] /* output */, &equed /* output       
-                                &Rscale[0] /* row scale factors (output)       
-                                &Cscale[0] /* col scale factors (output)       
-                                &SL /* fact L (output)*/, &SU /* fact U 
-                                NULL /* work                                   
-                                0 /* lwork: superlu auto allocates (input)     
-                                &SB /* rhs */, &SX /* solution                 
-                                &recip_pivot_gross /* reciprocal pivot growth  
-                                /* factor max_j( norm(A_j)/norm(U_j) ).        
-                                &rcond /*estimate of the reciprocal condition  
-                                /* number of the matrix A after equilibration  
-                                &ferr[0] /* estimated forward error            
-                                &berr[0] /* relative backward error            
-                                &stat, &info, T());
-    Destroy_SuperMatrix_Store(&SB);
-    Destroy_SuperMatrix_Store(&SX);
-    Create_Dense_Matrix(&SB, m, 1, &rhs[0], m);
-    Create_Dense_Matrix(&SX, m, 1, &sol[0], m);
-    StatFree(&stat);
-    GMM_ASSERT1(info != -333333333, "SuperLU was cancelled.");
-    GMM_ASSERT1(info == 0, "SuperLU solve failed: info=" << info);
-    is_init = true;
-  }
-  template <typename T>
-  void SuperLU_factor_impl<T>::solve(int transp) {
-    options.Fact = FACTORED;
-    options.IterRefine = NOREFINE;
-    switch (transp) {
-      case SuperLU_factor<T>::LU_NOTRANSP: options.Trans = NOTRANS; break;
-      case SuperLU_factor<T>::LU_TRANSP: options.Trans = TRANS; break;
-      case SuperLU_factor<T>::LU_CONJUGATED: options.Trans = CONJ; break;
-      default: GMM_ASSERT1(false, "invalid value for transposition option");
-    }
-    StatInit(&stat);
-    int info = 0;
-    R recip_pivot_gross, rcond;
-    SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0],
-                  &etree[0] /* output */, &equed /* output        */,
-                  &Rscale[0] /* row scale factors (output)        */,
-                  &Cscale[0] /* col scale factors (output)        */,
-                  &SL /* fact L (output)*/, &SU /* fact U (output)*/,
-                  NULL /* work                                    */,
-                  0 /* lwork: superlu auto allocates (input)      */,
-                  &SB /* rhs */, &SX /* solution                  */,
-                  &recip_pivot_gross /* reciprocal pivot growth   */
-                  /* factor max_j( norm(A_j)/norm(U_j) ).         */,
-                  &rcond /*estimate of the reciprocal condition   */
-                  /* number of the matrix A after equilibration   */,
-                  &ferr[0] /* estimated forward error             */,
-                  &berr[0] /* relative backward error             */,
-                  &stat, &info, T());
-    StatFree(&stat);
-    GMM_ASSERT1(info == 0, "SuperLU solve failed: info=" << info);
-  }
-  template<typename T> void
-  SuperLU_factor<T>::build_with(const gmm::csc_matrix<T> &A, int permc_spec) {
-    ((SuperLU_factor_impl<T>*)impl.get())->build_with(A,permc_spec);
-  }
-  template<typename T> void
-  SuperLU_factor<T>::solve(int transp) const {
-    ((SuperLU_factor_impl<T>*)impl.get())->solve(transp);
-  }
-  template<typename T> std::vector<T> &
-  SuperLU_factor<T>::sol() const {
-    return ((SuperLU_factor_impl<T>*)impl.get())->sol;
-  }
-  template<typename T> std::vector<T> &
-  SuperLU_factor<T>::rhs() const {
-    return ((SuperLU_factor_impl<T>*)impl.get())->rhs;
-  }
-  template<typename T>
-  SuperLU_factor<T>::SuperLU_factor() {
-    impl = std::make_shared<SuperLU_factor_impl<T>>();
-  }
-  template<typename T>
-  SuperLU_factor<T>::SuperLU_factor(const SuperLU_factor& other) {
-    impl =  std::make_shared<SuperLU_factor_impl<T>>();
-    GMM_ASSERT1(!(other.impl->is_init),
-                "copy of initialized SuperLU_factor is forbidden");
-    other.impl->is_init = false;
-  }
-  template<typename T> SuperLU_factor<T>&
-  SuperLU_factor<T>::operator=(const SuperLU_factor& other) {
-    GMM_ASSERT1(!(other.impl->is_init) && !(impl->is_init),
-                "assignment of initialized SuperLU_factor is forbidden");
-    return *this;
-  }
-  template<typename T> float
-  SuperLU_factor<T>::memsize() const {
-    return impl->memory_used;
-  }
-  /*  void force_instantiation() {
-    SuperLU_factor<float> a;
-    SuperLU_factor<double> b;
-    SuperLU_factor<std::complex<float> > c;
-    SuperLU_factor<std::complex<double> > d;
-    //a = 0; b = 0; c = 0; d = 0;
-  }
-  */
-template class gmm::SuperLU_factor<float>;
-template class gmm::SuperLU_factor<double>;
-template class gmm::SuperLU_factor<std::complex<float> >;
-template class gmm::SuperLU_factor<std::complex<double> >;
-static int (*superlu_callback)();
-/* this one is called from superlu (see dcolumn_bmod) */
-extern "C" int handle_getfem_callback() {
-  if (superlu_callback) return superlu_callback();
-  else return 0;
-extern "C" void set_superlu_callback(int (*cb)()) {
-  superlu_callback = cb;
diff --git a/src/gmm/ b/src/gmm/
new file mode 100644
index 00000000..0d46b9ab
--- /dev/null
+++ b/src/gmm/
@@ -0,0 +1,25 @@
+/* manually created template to be used by autotools.  */
+/* gcc style __PRETTY_FUNCTION__ macro */
+/* defined if GMM is linked to a blas library */
+/* defined if GMM is linked to a lapack library */
+/* defined if GMM uses MPI */
+#undef GMM_USES_MPI
+/* defined if GMM is linked to the mumps library */
+/* defined if GMM is linked to the superlu library */
+/* Use blas with 64 bits integers */
+/* GMM version */
diff --git a/src/gmm/gmm_solver_Schwarz_additive.h 
index 2d6e9fdd..378534e1 100644
--- a/src/gmm/gmm_solver_Schwarz_additive.h
+++ b/src/gmm/gmm_solver_Schwarz_additive.h
@@ -39,7 +39,11 @@
 #include "gmm_kernel.h"
+#if defined(GMM_USES_SUPERLU)
 #include "gmm_superlu_interface.h"
+#include "gmm_MUMPS_interface.h"
 #include "gmm_solver_cg.h"
 #include "gmm_solver_gmres.h"
 #include "gmm_solver_bicgstab.h"
@@ -568,8 +572,12 @@ namespace gmm {
       gmm::mult(M.vM[i], p, v);
       gmm::mult(gmm::transposed(M.vB[i]), v, w);
+#if defined(GMM_USES_SUPERLU)
       double rcond;
-      SuperLU_solve(M.vMloc[i], x, w, rcond);
+      gmm::SuperLU_solve(M.vMloc[i], x, w, rcond);
+      gmm::MUMPS_solve(M.vMloc[i], x, w);
       // gmm::iteration iter(1E-10, 0, 100000);
       //gmm::gmres(M.vMloc[i], x, w, gmm::identity_matrix(), 50, iter);
       gmm::mult_add(M.vB[i], x, q);
diff --git a/src/gmm/gmm_superlu_interface.h b/src/gmm/gmm_superlu_interface.h
index 964efd43..aecaa875 100644
--- a/src/gmm/gmm_superlu_interface.h
+++ b/src/gmm/gmm_superlu_interface.h
@@ -34,7 +34,7 @@
    @date October 17, 2003.
    @brief Interface with SuperLU (LU direct solver for sparse matrices).
-#if defined(GMM_USES_SUPERLU) && !defined(GETFEM_VERSION)
+#if defined(GMM_USES_SUPERLU)
@@ -144,11 +144,12 @@ namespace gmm {
                                FLOATTYPE *recip_pivot_growth,                  
                                FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE 
*berr, \
                                SuperLUStat_t *stats, int *info, KEYTYPE) {     
-    NAMESPACE::mem_usage_t mem_usage;                                    \
+    mem_usage_t mem_usage;                                                     
+    GlobalLU_t Glu;                                                            
     NAMESPACE::FNAME(options, A, perm_c, perm_r, etree, equed, R, C, L,  \
                      U, work, lwork, B, X, recip_pivot_growth, rcond,    \
-                     ferr, berr, &mem_usage, stats, info);               \
-    return mem_usage.for_lu; /* bytes used by the factor storage */     \
+                     ferr, berr, &Glu, &mem_usage, stats, info);         \
+    return mem_usage.for_lu; /* bytes used by the factor storage */      \
diff --git a/superlu/BLAS.c b/superlu/BLAS.c
deleted file mode 100644
index 21df5005..00000000
--- a/superlu/BLAS.c
+++ /dev/null
@@ -1,43902 +0,0 @@
-/* BLAS.f -- translated by f2c
-   You must link the resulting object file with the libraries:
-       -lf2c -lm   (in that order)
-   the f2c-ed file has been slightly modified (removal of lsame_, added r_sign)
-   Original fortran source files are distributed along with this package in 
the sub-directory BLAS
-  The reference BLAS is a freely-available software package. It is available 
from netlib via anonymous ftp
-  and the World Wide Web. Thus, it can be included in commercial software 
packages (and has been). We only
-  ask that proper credit be given to the authors.
-  Like all software, it is copyrighted. It is not trademarked, but we do ask 
the following:
-  If you modify the source for these routines we ask that you change the name 
of the routine and comment
-  the changes made to the original.
-  We will gladly answer any questions regarding the software. If a 
modification is done, however, it is the
-  responsibility of the person who modified the routine to provide support.
-  see
-/* Copyright (C) 2004-2020 Julien Pommier
-  This file is a part of GetFEM++
-  GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
-  under  the  terms  of the  GNU  Lesser General Public License as published
-  by  the  Free Software Foundation;  either  version 3  of the License,  or
-  (at your option) any later version along with the GCC Runtime Library
-  Exception either version 3.1 or (at your option) any later version.
-  This program  is  distributed  in  the  hope  that it will be useful,  but
-  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-  or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-  License and GCC Runtime Library Exception for more details.
-  You  should  have received a copy of the GNU Lesser General Public License
-  along  with  this program;  if not, write to the Free Software Foundation,
-  Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
-#include "BLAS_f2c.h"
-/* Table of constant values */
-static complex c_b21 = {1.f,0.f};
-static doublereal c_b876 = 1.;
-static real c_b1543 = 1.f;
-static integer c__1 = 1;
-static doublecomplex c_b2094 = {1.,0.};
-/* Subroutine */ int caxpy_(integer *n, complex *ca, complex *cx, integer *
-       incx, complex *cy, integer *incy)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    real r__1, r__2;
-    complex q__1, q__2;
-    /* Builtin functions */
-    double r_imag(complex *);
-    /* Local variables */
-    static integer i__, ix, iy;
-/*     constant times a vector plus a vector. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if ((r__1 = ca->r, dabs(r__1)) + (r__2 = r_imag(ca), dabs(r__2)) == 0.f) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = iy;
-       i__3 = iy;
-       i__4 = ix;
-       q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[
-               i__4].i + ca->i * cx[i__4].r;
-       q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i;
-       cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*        code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       i__4 = i__;
-       q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[
-               i__4].i + ca->i * cx[i__4].r;
-       q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i;
-       cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
-/* L30: */
-    }
-    return 0;
-} /* caxpy_ */
-/* Subroutine */ int ccopy_(integer *n, complex *cx, integer *incx, complex *
-       cy, integer *incy)
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    /* Local variables */
-    static integer i__, ix, iy;
-/*     copies a vector, x, to a vector, y. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = iy;
-       i__3 = ix;
-       cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*        code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i;
-/* L30: */
-    }
-    return 0;
-} /* ccopy_ */
-/* Complex */ VOID cdotc_(complex * ret_val, integer *n, complex *cx, integer 
-       *incx, complex *cy, integer *incy)
-    /* System generated locals */
-    integer i__1, i__2;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
-/*     forms the dot product of two vectors, conjugating the first */
-/*     vector. */
-/*     jack dongarra, linpack,  3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-    /* Function Body */
-    ctemp.r = 0.f, ctemp.i = 0.f;
-     ret_val->r = 0.f,  ret_val->i = 0.f;
-    if (*n <= 0) {
-       return ;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       r_cnjg(&q__3, &cx[ix]);
-       i__2 = iy;
-       q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r * 
-               cy[i__2].i + q__3.i * cy[i__2].r;
-       q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
-/*        code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       r_cnjg(&q__3, &cx[i__]);
-       i__2 = i__;
-       q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r * 
-               cy[i__2].i + q__3.i * cy[i__2].r;
-       q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-/* L30: */
-    }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
-} /* cdotc_ */
-/* Complex */ VOID cdotu_(complex * ret_val, integer *n, complex *cx, integer 
-       *incx, complex *cy, integer *incy)
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    complex q__1, q__2;
-    /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
-/*     forms the dot product of two vectors. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-    /* Function Body */
-    ctemp.r = 0.f, ctemp.i = 0.f;
-     ret_val->r = 0.f,  ret_val->i = 0.f;
-    if (*n <= 0) {
-       return ;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       i__3 = iy;
-       q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i = 
-               cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r;
-       q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
-/*        code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i = 
-               cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r;
-       q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-/* L30: */
-    }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
-} /* cdotu_ */
-/* Subroutine */ int cgbmv_(char *trans, integer *m, integer *n, integer *kl, 
-       integer *ku, complex *alpha, complex *a, integer *lda, complex *x, 
-       integer *incx, complex *beta, complex *y, integer *incy, ftnlen 
-       trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, k, ix, iy, jx, jy, kx, ky, kup1, info;
-    static complex temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CGBMV  performs one of the matrix-vector operations */
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or */
-/*     y := alpha*conjg( A' )*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n band matrix, with kl sub-diagonals and ku super-diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-/*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  KL     - INTEGER. */
-/*           On entry, KL specifies the number of sub-diagonals of the */
-/*           matrix A. KL must satisfy  0 .le. KL. */
-/*           Unchanged on exit. */
-/*  KU     - INTEGER. */
-/*           On entry, KU specifies the number of super-diagonals of the */
-/*           matrix A. KU must satisfy  0 .le. KU. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading ( kl + ku + 1 ) by n part of the */
-/*           array A must contain the matrix of coefficients, supplied */
-/*           column by column, with the leading diagonal of the matrix in */
-/*           row ( ku + 1 ) of the array, the first super-diagonal */
-/*           starting at position 2 in row ku, the first sub-diagonal */
-/*           starting at position 1 in row ( ku + 2 ), and so on. */
-/*           Elements in the array A that do not correspond to elements */
-/*           in the band matrix (such as the top left ku by ku triangle) */
-/*           are not referenced. */
-/*           The following program segment will transfer a band matrix */
-/*           from conventional full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    K = KU + 1 - J */
-/*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */
-/*                       A( K + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( kl + ku + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*kl < 0) {
-       info = 4;
-    } else if (*ku < 0) {
-       info = 5;
-    } else if (*lda < *kl + *ku + 1) {
-       info = 8;
-    } else if (*incx == 0) {
-       info = 10;
-    } else if (*incy == 0) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("CGBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r 
-           == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the band part of A. */
-/*     First form  y := beta*y. */
-    if (beta->r != 1.f || beta->i != 0.f) {
-       if (*incy == 1) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-    kup1 = *ku + 1;
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   i__2 = jx;
-                   q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       i__2 = i__;
-                       i__3 = i__;
-                       i__5 = k + i__ + j * a_dim1;
-                       q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + 
-                               q__2.i;
-                       y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__4 = jx;
-               if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
-                   i__4 = jx;
-                   q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, 
-                           q__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4]
-                           .r;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   iy = ky;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       i__4 = iy;
-                       i__2 = iy;
-                       i__5 = k + i__ + j * a_dim1;
-                       q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + 
-                               q__2.i;
-                       y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               if (j > *ku) {
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0.f, temp.i = 0.f;
-               k = kup1 - j;
-               if (noconj) {
-/* Computing MAX */
-                   i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__2 = min(i__5,i__6);
-                   for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                       i__3 = k + i__ + j * a_dim1;
-                       i__4 = i__;
-                       q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                   }
-               } else {
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       r_cnjg(&q__3, &a[k + i__ + j * a_dim1]);
-                       i__2 = i__;
-                       q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                               q__2.i = q__3.r * x[i__2].i + q__3.i * x[i__2]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                   }
-               }
-               i__4 = jy;
-               i__2 = jy;
-               q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-               y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-               jy += *incy;
-/* L110: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0.f, temp.i = 0.f;
-               ix = kx;
-               k = kup1 - j;
-               if (noconj) {
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       i__4 = k + i__ + j * a_dim1;
-                       i__2 = ix;
-                       q__2.r = a[i__4].r * x[i__2].r - a[i__4].i * x[i__2]
-                               .i, q__2.i = a[i__4].r * x[i__2].i + a[i__4]
-                               .i * x[i__2].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       ix += *incx;
-/* L120: */
-                   }
-               } else {
-/* Computing MAX */
-                   i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__2 = min(i__5,i__6);
-                   for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                       r_cnjg(&q__3, &a[k + i__ + j * a_dim1]);
-                       i__3 = ix;
-                       q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                               q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       ix += *incx;
-/* L130: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jy += *incy;
-               if (j > *ku) {
-                   kx += *incx;
-               }
-/* L140: */
-           }
-       }
-    }
-    return 0;
-/*     End of CGBMV . */
-} /* cgbmv_ */
-/* Subroutine */ int cgemm_(char *transa, char *transb, integer *m, integer *
-       n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, 
-       integer *ldb, complex *beta, complex *c__, integer *ldc, ftnlen 
-       transa_len, ftnlen transb_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    complex q__1, q__2, q__3, q__4;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, l, info;
-    static logical nota, notb;
-    static complex temp;
-    static logical conja, conjb;
-    static integer ncola;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa, nrowb;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CGEMM  performs one of the matrix-matrix operations */
-/*     C := alpha*op( A )*op( B ) + beta*C, */
-/*  where  op( X ) is one of */
-/*     op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ), */
-/*  alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
-/*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n',  op( A ) = A. */
-/*              TRANSA = 'T' or 't',  op( A ) = A'. */
-/*              TRANSA = 'C' or 'c',  op( A ) = conjg( A' ). */
-/*           Unchanged on exit. */
-/*           On entry, TRANSB specifies the form of op( B ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSB = 'N' or 'n',  op( B ) = B. */
-/*              TRANSB = 'T' or 't',  op( B ) = B'. */
-/*              TRANSB = 'C' or 'c',  op( B ) = conjg( B' ). */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies  the number  of rows  of the  matrix */
-/*           op( A )  and of the  matrix  C.  M  must  be at least  zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N  specifies the number  of columns of the matrix */
-/*           op( B ) and the number of columns of the matrix C. N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry,  K  specifies  the number of columns of the matrix */
-/*           op( A ) and the number of rows of the matrix op( B ). K must */
-/*           be at least  zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise. */
-/*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by m  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is */
-/*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise. */
-/*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  n by k  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then */
-/*           LDB must be at least  max( 1, k ), otherwise  LDB must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX         . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n  matrix */
-/*           ( alpha*op( A )*op( B ) + beta*C ). */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not */
-/*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and */
-/*     B  respectively are to be  transposed but  not conjugated  and set */
-/*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A */
-/*     and the number of rows of  B  respectively. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    nota = lsame_(transa, "N", (ftnlen)1, (ftnlen)1);
-    notb = lsame_(transb, "N", (ftnlen)1, (ftnlen)1);
-    conja = lsame_(transa, "C", (ftnlen)1, (ftnlen)1);
-    conjb = lsame_(transb, "C", (ftnlen)1, (ftnlen)1);
-    if (nota) {
-       nrowa = *m;
-       ncola = *k;
-    } else {
-       nrowa = *k;
-       ncola = *m;
-    }
-    if (notb) {
-       nrowb = *k;
-    } else {
-       nrowb = *n;
-    }
-/*     Test the input parameters. */
-    info = 0;
-    if (! nota && ! conja && ! lsame_(transa, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! notb && ! conjb && ! lsame_(transb, "T", (ftnlen)1, (ftnlen)
-           1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < max(1,nrowa)) {
-       info = 8;
-    } else if (*ldb < max(1,nrowb)) {
-       info = 10;
-    } else if (*ldc < max(1,*m)) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("CGEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) 
-           && (beta->r == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (beta->r == 0.f && beta->i == 0.f) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           q__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (notb) {
-       if (nota) {
-/*           Form  C := alpha*A*B + beta*C. */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L50: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L60: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = l + j * b_dim1;
-                   if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
-                       i__3 = l + j * b_dim1;
-                       q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L70: */
-                       }
-                   }
-/* L80: */
-               }
-/* L90: */
-           }
-       } else if (conja) {
-/*           Form  C := alpha*conjg( A' )*B + beta*C. */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, 
-                               q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L110: */
-               }
-/* L120: */
-           }
-       } else {
-/*           Form  C := alpha*A'*B + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L130: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L140: */
-               }
-/* L150: */
-           }
-       }
-    } else if (nota) {
-       if (conjb) {
-/*           Form  C := alpha*A*conjg( B' ) + beta*C. */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L160: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L170: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * b_dim1;
-                   if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
-                       r_cnjg(&q__2, &b[j + l * b_dim1]);
-                       q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, 
-                               q__1.i = alpha->r * q__2.i + alpha->i * 
-                               q__2.r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L180: */
-                       }
-                   }
-/* L190: */
-               }
-/* L200: */
-           }
-       } else {
-/*           Form  C := alpha*A*B'          + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L210: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L220: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * b_dim1;
-                   if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
-                       i__3 = j + l * b_dim1;
-                       q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L230: */
-                       }
-                   }
-/* L240: */
-               }
-/* L250: */
-           }
-       }
-    } else if (conja) {
-       if (conjb) {
-/*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C. */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       r_cnjg(&q__4, &b[j + l * b_dim1]);
-                       q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = 
-                               q__3.r * q__4.i + q__3.i * q__4.r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L260: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L270: */
-               }
-/* L280: */
-           }
-       } else {
-/*           Form  C := alpha*conjg( A' )*B' + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = j + l * b_dim1;
-                       q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, 
-                               q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L290: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L300: */
-               }
-/* L310: */
-           }
-       }
-    } else {
-       if (conjb) {
-/*           Form  C := alpha*A'*conjg( B' ) + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       r_cnjg(&q__3, &b[j + l * b_dim1]);
-                       q__2.r = a[i__4].r * q__3.r - a[i__4].i * q__3.i, 
-                               q__2.i = a[i__4].r * q__3.i + a[i__4].i * 
-                               q__3.r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L320: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L330: */
-               }
-/* L340: */
-           }
-       } else {
-/*           Form  C := alpha*A'*B' + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = j + l * b_dim1;
-                       q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L350: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L360: */
-               }
-/* L370: */
-           }
-       }
-    }
-    return 0;
-/*     End of CGEMM . */
-} /* cgemm_ */
-/* Subroutine */ int cgemv_(char *trans, integer *m, integer *n, complex *
-       alpha, complex *a, integer *lda, complex *x, integer *incx, complex *
-       beta, complex *y, integer *incy, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static complex temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CGEMV  performs one of the matrix-vector operations */
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or */
-/*     y := alpha*conjg( A' )*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-/*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry with BETA non-zero, the incremented array Y */
-/*           must contain the vector y. On exit, Y is overwritten by the */
-/*           updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*lda < max(1,*m)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("CGEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r 
-           == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-/*     First form  y := beta*y. */
-    if (beta->r != 1.f || beta->i != 0.f) {
-       if (*incy == 1) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   i__2 = jx;
-                   q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__;
-                       i__4 = i__;
-                       i__5 = i__ + j * a_dim1;
-                       q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + 
-                               q__2.i;
-                       y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   i__2 = jx;
-                   q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   iy = ky;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = iy;
-                       i__4 = iy;
-                       i__5 = i__ + j * a_dim1;
-                       q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + 
-                               q__2.i;
-                       y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0.f, temp.i = 0.f;
-               if (noconj) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__;
-                       q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                   }
-               } else {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                       i__3 = i__;
-                       q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                               q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jy += *incy;
-/* L110: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0.f, temp.i = 0.f;
-               ix = kx;
-               if (noconj) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = ix;
-                       q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       ix += *incx;
-/* L120: */
-                   }
-               } else {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                       i__3 = ix;
-                       q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                               q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       ix += *incx;
-/* L130: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jy += *incy;
-/* L140: */
-           }
-       }
-    }
-    return 0;
-/*     End of CGEMV . */
-} /* cgemv_ */
-/* Subroutine */ int cgerc_(integer *m, integer *n, complex *alpha, complex *
-       x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static complex temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CGERC  performs the rank 1 operation */
-/*     A := alpha*x*conjg( y' ) + A, */
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CGERC ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
-               r_cnjg(&q__2, &y[jy]);
-               q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                       alpha->r * q__2.i + alpha->i * q__2.r;
-               temp.r = q__1.r, temp.i = q__1.i;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = i__;
-                   q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-                   a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
-               r_cnjg(&q__2, &y[jy]);
-               q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                       alpha->r * q__2.i + alpha->i * q__2.r;
-               temp.r = q__1.r, temp.i = q__1.i;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = ix;
-                   q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-                   a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-    return 0;
-/*     End of CGERC . */
-} /* cgerc_ */
-/* Subroutine */ int cgeru_(integer *m, integer *n, complex *alpha, complex *
-       x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2;
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static complex temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CGERU  performs the rank 1 operation */
-/*     A := alpha*x*y' + A, */
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CGERU ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
-               i__2 = jy;
-               q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i =
-                        alpha->r * y[i__2].i + alpha->i * y[i__2].r;
-               temp.r = q__1.r, temp.i = q__1.i;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = i__;
-                   q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-                   a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
-               i__2 = jy;
-               q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i =
-                        alpha->r * y[i__2].i + alpha->i * y[i__2].r;
-               temp.r = q__1.r, temp.i = q__1.i;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = ix;
-                   q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-                   a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-    return 0;
-/*     End of CGERU . */
-} /* cgeru_ */
-/* Subroutine */ int chbmv_(char *uplo, integer *n, integer *k, complex *
-       alpha, complex *a, integer *lda, complex *x, integer *incx, complex *
-       beta, complex *y, integer *incy, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CHBMV  performs the matrix-vector  operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian band matrix, with k super-diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*k < 0) {
-       info = 3;
-    } else if (*lda < *k + 1) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("CHBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && 
-           beta->i == 0.f)) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-/*     First form  y := beta*y. */
-    if (beta->r != 1.f || beta->i != 0.f) {
-       if (*incy == 1) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when upper triangle of A is stored. */
-       kplus1 = *k + 1;
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *k;
-               i__4 = j - 1;
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   i__5 = l + i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                   i__2 = i__;
-                   q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, q__2.i =
-                            q__3.r * x[i__2].i + q__3.i * x[i__2].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L50: */
-               }
-               i__4 = j;
-               i__2 = j;
-               i__3 = kplus1 + j * a_dim1;
-               r__1 = a[i__3].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__2].r + q__3.r, q__2.i = y[i__2].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__4 = jx;
-               q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, q__1.i =
-                        alpha->r * x[i__4].i + alpha->i * x[i__4].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               ix = kx;
-               iy = ky;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__4 = 1, i__2 = j - *k;
-               i__3 = j - 1;
-               for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                   i__4 = iy;
-                   i__2 = iy;
-                   i__5 = l + i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-                   y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-                   r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = ix;
-                   q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
-                            q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__3 = jy;
-               i__4 = jy;
-               i__2 = kplus1 + j * a_dim1;
-               r__1 = a[i__2].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__4].r + q__3.r, q__2.i = y[i__4].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-               if (j > *k) {
-                   kx += *incx;
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when lower triangle of A is stored. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__3 = j;
-               q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i =
-                        alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__3 = j;
-               i__4 = j;
-               i__2 = j * a_dim1 + 1;
-               r__1 = a[i__2].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-               y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-               l = 1 - j;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   i__4 = i__;
-                   i__2 = i__;
-                   i__5 = l + i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-                   y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-                   r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = i__;
-                   q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
-                            q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L90: */
-               }
-               i__3 = j;
-               i__4 = j;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-               y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__3 = jx;
-               q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i =
-                        alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__3 = jy;
-               i__4 = jy;
-               i__2 = j * a_dim1 + 1;
-               r__1 = a[i__2].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-               y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-               l = 1 - j;
-               ix = jx;
-               iy = jy;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__4 = iy;
-                   i__2 = iy;
-                   i__5 = l + i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-                   y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-                   r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = ix;
-                   q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
-                            q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L110: */
-               }
-               i__3 = jy;
-               i__4 = jy;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-               y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of CHBMV . */
-} /* chbmv_ */
-/* Subroutine */ int chemm_(char *side, char *uplo, integer *m, integer *n, 
-       complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, 
-       complex *beta, complex *c__, integer *ldc, ftnlen side_len, ftnlen 
-       uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    real r__1;
-    complex q__1, q__2, q__3, q__4, q__5;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, k, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CHEMM  performs one of the matrix-matrix operations */
-/*     C := alpha*A*B + beta*C, */
-/*  or */
-/*     C := alpha*B*A + beta*C, */
-/*  where alpha and beta are scalars, A is an hermitian matrix and  B and */
-/*  C are m by n matrices. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  hermitian matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  hermitian  matrix   A  is  to  be */
-/*           referenced as follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  hermitian matrix is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  hermitian matrix is to be referenced. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is n  otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  hermitian matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  hermitian matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  hermitian */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  hermitian matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  hermitian matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  hermitian */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Note that the imaginary parts  of the diagonal elements need */
-/*           not be set, they are assumed to be zero. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX          array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX         . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Set NROWA as the number of rows of A. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-/*     Test the input parameters. */
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("CHEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r 
-           == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (beta->r == 0.f && beta->i == 0.f) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           q__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*B + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * b_dim1;
-                   q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                           q__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + j * c_dim1;
-                       i__6 = k + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__6].r - temp1.i * a[i__6].i, 
-                               q__2.i = temp1.r * a[i__6].i + temp1.i * a[
-                               i__6].r;
-                       q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                               q__2.i;
-                       c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-                       i__4 = k + j * b_dim1;
-                       r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                       q__2.r = b[i__4].r * q__3.r - b[i__4].i * q__3.i, 
-                               q__2.i = b[i__4].r * q__3.i + b[i__4].i * 
-                               q__3.r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L50: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + i__ * a_dim1;
-                       r__1 = a[i__4].r;
-                       q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       i__5 = i__ + i__ * a_dim1;
-                       r__1 = a[i__5].r;
-                       q__4.r = r__1 * temp1.r, q__4.i = r__1 * temp1.i;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   i__2 = i__ + j * b_dim1;
-                   q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                           q__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       i__3 = k + j * c_dim1;
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               q__2.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       q__1.r = c__[i__4].r + q__2.r, q__1.i = c__[i__4].i + 
-                               q__2.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       i__3 = k + j * b_dim1;
-                       r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                       q__2.r = b[i__3].r * q__3.r - b[i__3].i * q__3.i, 
-                               q__2.i = b[i__3].r * q__3.i + b[i__3].i * 
-                               q__3.r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L80: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + i__ * a_dim1;
-                       r__1 = a[i__3].r;
-                       q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-                   } else {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__3].r - beta->i * c__[i__3]
-                               .i, q__3.i = beta->r * c__[i__3].i + beta->i *
-                                c__[i__3].r;
-                       i__4 = i__ + i__ * a_dim1;
-                       r__1 = a[i__4].r;
-                       q__4.r = r__1 * temp1.r, q__4.i = r__1 * temp1.i;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*B*A + beta*C. */
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = j + j * a_dim1;
-           r__1 = a[i__2].r;
-           q__1.r = r__1 * alpha->r, q__1.i = r__1 * alpha->i;
-           temp1.r = q__1.r, temp1.i = q__1.i;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * b_dim1;
-                   q__1.r = temp1.r * b[i__4].r - temp1.i * b[i__4].i, 
-                           q__1.i = temp1.r * b[i__4].i + temp1.i * b[i__4]
-                           .r;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   q__2.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           q__2.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   i__5 = i__ + j * b_dim1;
-                   q__3.r = temp1.r * b[i__5].r - temp1.i * b[i__5].i, 
-                           q__3.i = temp1.r * b[i__5].i + temp1.i * b[i__5]
-                           .r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = k + j * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               } else {
-                   r_cnjg(&q__2, &a[j + k * a_dim1]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   q__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           q__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                           q__2.i;
-                   c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   r_cnjg(&q__2, &a[j + k * a_dim1]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               } else {
-                   i__3 = k + j * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   q__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           q__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                           q__2.i;
-                   c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-    return 0;
-/*     End of CHEMM . */
-} /* chemm_ */
-/* Subroutine */ int chemv_(char *uplo, integer *n, complex *alpha, complex *
-       a, integer *lda, complex *x, integer *incx, complex *beta, complex *y,
-        integer *incy, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CHEMV  performs the matrix-vector  operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*lda < max(1,*n)) {
-       info = 5;
-    } else if (*incx == 0) {
-       info = 7;
-    } else if (*incy == 0) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("CHEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && 
-           beta->i == 0.f)) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-/*     First form  y := beta*y. */
-    if (beta->r != 1.f || beta->i != 0.f) {
-       if (*incy == 1) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when A is stored in upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                   i__3 = i__;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L50: */
-               }
-               i__2 = j;
-               i__3 = j;
-               i__4 = j + j * a_dim1;
-               r__1 = a[i__4].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               ix = kx;
-               iy = ky;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                   i__3 = ix;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = j + j * a_dim1;
-               r__1 = a[i__4].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when A is stored in lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__2 = j;
-               i__3 = j;
-               i__4 = j + j * a_dim1;
-               r__1 = a[i__4].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                   i__3 = i__;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L90: */
-               }
-               i__2 = j;
-               i__3 = j;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = j + j * a_dim1;
-               r__1 = a[i__4].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               ix = jx;
-               iy = jy;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                   i__3 = ix;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L110: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of CHEMV . */
-} /* chemv_ */
-/* Subroutine */ int cher_(char *uplo, integer *n, real *alpha, complex *x, 
-       integer *incx, complex *a, integer *lda, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CHER   performs the hermitian rank 1 operation */
-/*     A := alpha*x*conjg( x' ) + A, */
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n hermitian matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*lda < max(1,*n)) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("CHER  ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-/*     Set the start point in X if the increment is not unity. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when A is stored in upper triangle. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[j]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + 
-                               q__2.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L10: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   q__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, q__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[jx]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   ix = kx;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + 
-                               q__2.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-                       ix += *incx;
-/* L30: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   q__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, q__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-               jx += *incx;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when A is stored in lower triangle. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[j]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   q__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, q__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + 
-                               q__2.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L50: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[jx]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   q__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, q__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-                   ix = jx;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + 
-                               q__2.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of CHER  . */
-} /* cher_ */
-/* Subroutine */ int cher2_(char *uplo, integer *n, complex *alpha, complex *
-       x, integer *incx, complex *y, integer *incy, complex *a, integer *lda,
-        ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CHER2  performs the hermitian rank 2 operation */
-/*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, */
-/*  where alpha is a scalar, x and y are n element vectors and A is an n */
-/*  by n hermitian matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*n)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CHER2 ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when A is stored in the upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[j]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = j;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + 
-                               q__3.i;
-                       i__6 = i__;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L10: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[jy]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = jx;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ix = kx;
-                   iy = ky;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + 
-                               q__3.i;
-                       i__6 = iy;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-               jx += *incx;
-               jy += *incy;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when A is stored in the lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[j]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = j;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + 
-                               q__3.i;
-                       i__6 = i__;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L50: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[jy]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = jx;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-                   ix = jx;
-                   iy = jy;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       iy += *incy;
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + 
-                               q__3.i;
-                       i__6 = iy;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of CHER2 . */
-} /* cher2_ */
-/* Subroutine */ int cher2k_(char *uplo, char *trans, integer *n, integer *k, 
-       complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, 
-       real *beta, complex *c__, integer *ldc, ftnlen uplo_len, ftnlen 
-       trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6, i__7;
-    real r__1;
-    complex q__1, q__2, q__3, q__4, q__5, q__6;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, l, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CHER2K  performs one of the hermitian rank 2k operations */
-/*     C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, */
-/*  or */
-/*     C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, */
-/*  where  alpha and beta  are scalars with  beta  real,  C is an  n by n */
-/*  hermitian matrix and  A and B  are  n by k matrices in the first case */
-/*  and  k by n  matrices in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          + */
-/*                                         conjg( alpha )*B*conjg( A' ) + */
-/*                                         beta*C. */
-/*              TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          + */
-/*                                         conjg( alpha )*conjg( B' )*A + */
-/*                                         beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the */
-/*           matrices  A and B.  K must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set,  they are assumed to be zero,  and on exit they */
-/*           are set to zero. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. */
-/*     Ed Anderson, Cray Research Inc. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "C", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("CHER2K", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) && *beta ==
-            1.f) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (upper) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + */
-/*                   C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L90: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L100: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 
-                           0.f || b[i__4].i != 0.f)) {
-                       r_cnjg(&q__2, &b[j + l * b_dim1]);
-                       q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, 
-                               q__1.i = alpha->r * q__2.i + alpha->i * 
-                               q__2.r;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__3 = j + l * a_dim1;
-                       q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__2.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       r_cnjg(&q__1, &q__2);
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-                       i__3 = j - 1;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, q__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
-                                   .i + q__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           q__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, q__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + 
-                                   q__4.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L110: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, 
-                               q__2.i = a[i__5].r * temp1.i + a[i__5].i * 
-                               temp1.r;
-                       i__6 = j + l * b_dim1;
-                       q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, 
-                               q__3.i = b[i__6].r * temp2.i + b[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       r__1 = c__[i__4].r + q__1.r;
-                       c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L140: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L150: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 
-                           0.f || b[i__4].i != 0.f)) {
-                       r_cnjg(&q__2, &b[j + l * b_dim1]);
-                       q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, 
-                               q__1.i = alpha->r * q__2.i + alpha->i * 
-                               q__2.r;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__3 = j + l * a_dim1;
-                       q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__2.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       r_cnjg(&q__1, &q__2);
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-                       i__3 = *n;
-                       for (i__ = j + 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, q__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
-                                   .i + q__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           q__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, q__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + 
-                                   q__4.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L160: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, 
-                               q__2.i = a[i__5].r * temp1.i + a[i__5].i * 
-                               temp1.r;
-                       i__6 = j + l * b_dim1;
-                       q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, 
-                               q__3.i = b[i__6].r * temp2.i + b[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       r__1 = c__[i__4].r + q__1.r;
-                       c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + */
-/*                   C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1.r = 0.f, temp1.i = 0.f;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, 
-                               q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-                               .r;
-                       q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       r_cnjg(&q__3, &b[l + i__ * b_dim1]);
-                       i__4 = l + j * a_dim1;
-                       q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, 
-                               q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-                               .r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L190: */
-                   }
-                   if (i__ == j) {
-                       if (*beta == 0.f) {
-                           i__3 = j + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           r__1 = q__1.r;
-                           c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                       } else {
-                           i__3 = j + j * c_dim1;
-                           i__4 = j + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           r__1 = *beta * c__[i__4].r + q__1.r;
-                           c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                       }
-                   } else {
-                       if (*beta == 0.f) {
-                           i__3 = i__ + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       } else {
-                           i__3 = i__ + j * c_dim1;
-                           i__4 = i__ + j * c_dim1;
-                           q__3.r = *beta * c__[i__4].r, q__3.i = *beta * 
-                                   c__[i__4].i;
-                           q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__4.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + 
-                                   q__4.i;
-                           r_cnjg(&q__6, alpha);
-                           q__5.r = q__6.r * temp2.r - q__6.i * temp2.i, 
-                                   q__5.i = q__6.r * temp2.i + q__6.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + 
-                                   q__5.i;
-                           c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       }
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1.r = 0.f, temp1.i = 0.f;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, 
-                               q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-                               .r;
-                       q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       r_cnjg(&q__3, &b[l + i__ * b_dim1]);
-                       i__4 = l + j * a_dim1;
-                       q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, 
-                               q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-                               .r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L220: */
-                   }
-                   if (i__ == j) {
-                       if (*beta == 0.f) {
-                           i__3 = j + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           r__1 = q__1.r;
-                           c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                       } else {
-                           i__3 = j + j * c_dim1;
-                           i__4 = j + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           r__1 = *beta * c__[i__4].r + q__1.r;
-                           c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                       }
-                   } else {
-                       if (*beta == 0.f) {
-                           i__3 = i__ + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       } else {
-                           i__3 = i__ + j * c_dim1;
-                           i__4 = i__ + j * c_dim1;
-                           q__3.r = *beta * c__[i__4].r, q__3.i = *beta * 
-                                   c__[i__4].i;
-                           q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__4.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + 
-                                   q__4.i;
-                           r_cnjg(&q__6, alpha);
-                           q__5.r = q__6.r * temp2.r - q__6.i * temp2.i, 
-                                   q__5.i = q__6.r * temp2.i + q__6.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + 
-                                   q__5.i;
-                           c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       }
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-    return 0;
-/*     End of CHER2K. */
-} /* cher2k_ */
-/* Subroutine */ int cherk_(char *uplo, char *trans, integer *n, integer *k, 
-       real *alpha, complex *a, integer *lda, real *beta, complex *c__, 
-       integer *ldc, ftnlen uplo_len, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    real r__1;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, l, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static real rtemp;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CHERK  performs one of the hermitian rank k operations */
-/*     C := alpha*A*conjg( A' ) + beta*C, */
-/*  or */
-/*     C := alpha*conjg( A' )*A + beta*C, */
-/*  where  alpha and beta  are  real scalars,  C is an  n by n  hermitian */
-/*  matrix and  A  is an  n by k  matrix in the  first case and a  k by n */
-/*  matrix in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C. */
-/*              TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the */
-/*           matrix A.  K must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set,  they are assumed to be zero,  and on exit they */
-/*           are set to zero. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. */
-/*     Ed Anderson, Cray Research Inc. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "C", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("CHERK ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.f) {
-       if (upper) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*conjg( A' ) + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L90: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L100: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                       r_cnjg(&q__2, &a[j + l * a_dim1]);
-                       q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = j - 1;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L110: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = i__ + l * a_dim1;
-                       q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       r__1 = c__[i__4].r + q__1.r;
-                       c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L140: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L150: */
-                   }
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                       r_cnjg(&q__2, &a[j + l * a_dim1]);
-                       q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       r__1 = c__[i__4].r + q__1.r;
-                       c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                       i__3 = *n;
-                       for (i__ = j + 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*conjg( A' )*A + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * a_dim1;
-                       q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, 
-                               q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L190: */
-                   }
-                   if (*beta == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[
-                               i__4].i;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L200: */
-               }
-               rtemp = 0.f;
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   r_cnjg(&q__3, &a[l + j * a_dim1]);
-                   i__3 = l + j * a_dim1;
-                   q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i =
-                            q__3.r * a[i__3].i + q__3.i * a[i__3].r;
-                   q__1.r = rtemp + q__2.r, q__1.i = q__2.i;
-                   rtemp = q__1.r;
-/* L210: */
-               }
-               if (*beta == 0.f) {
-                   i__2 = j + j * c_dim1;
-                   r__1 = *alpha * rtemp;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *alpha * rtemp + *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-/* L220: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               rtemp = 0.f;
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   r_cnjg(&q__3, &a[l + j * a_dim1]);
-                   i__3 = l + j * a_dim1;
-                   q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i =
-                            q__3.r * a[i__3].i + q__3.i * a[i__3].r;
-                   q__1.r = rtemp + q__2.r, q__1.i = q__2.i;
-                   rtemp = q__1.r;
-/* L230: */
-               }
-               if (*beta == 0.f) {
-                   i__2 = j + j * c_dim1;
-                   r__1 = *alpha * rtemp;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *alpha * rtemp + *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * a_dim1;
-                       q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, 
-                               q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L240: */
-                   }
-                   if (*beta == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[
-                               i__4].i;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L250: */
-               }
-/* L260: */
-           }
-       }
-    }
-    return 0;
-/*     End of CHERK . */
-} /* cherk_ */
-/* Subroutine */ int chpmv_(char *uplo, integer *n, complex *alpha, complex *
-       ap, complex *x, integer *incx, complex *beta, complex *y, integer *
-       incy, ftnlen uplo_len)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CHPMV  performs the matrix-vector operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  AP     - COMPLEX          array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 6;
-    } else if (*incy == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CHPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && 
-           beta->i == 0.f)) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-/*     First form  y := beta*y. */
-    if (beta->r != 1.f || beta->i != 0.f) {
-       if (*incy == 1) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when AP contains the upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               k = kk;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = k;
-                   q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &ap[k]);
-                   i__3 = i__;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ++k;
-/* L50: */
-               }
-               i__2 = j;
-               i__3 = j;
-               i__4 = kk + j - 1;
-               r__1 = ap[i__4].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               kk += j;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               ix = kx;
-               iy = ky;
-               i__2 = kk + j - 2;
-               for (k = kk; k <= i__2; ++k) {
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = k;
-                   q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &ap[k]);
-                   i__3 = ix;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = kk + j - 1;
-               r__1 = ap[i__4].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when AP contains the lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__2 = j;
-               i__3 = j;
-               i__4 = kk;
-               r__1 = ap[i__4].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               k = kk + 1;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = k;
-                   q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &ap[k]);
-                   i__3 = i__;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ++k;
-/* L90: */
-               }
-               i__2 = j;
-               i__3 = j;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               kk += *n - j + 1;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = kk;
-               r__1 = ap[i__4].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               ix = jx;
-               iy = jy;
-               i__2 = kk + *n - j;
-               for (k = kk + 1; k <= i__2; ++k) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = k;
-                   q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &ap[k]);
-                   i__3 = ix;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L110: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-               kk += *n - j + 1;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of CHPMV . */
-} /* chpmv_ */
-/* Subroutine */ int chpr_(char *uplo, integer *n, real *alpha, complex *x, 
-       integer *incx, complex *ap, ftnlen uplo_len)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CHPR    performs the hermitian rank 1 operation */
-/*     A := alpha*x*conjg( x' ) + A, */
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n hermitian matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  AP     - COMPLEX          array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --ap;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    }
-    if (info != 0) {
-       xerbla_("CHPR  ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-/*     Set the start point in X if the increment is not unity. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when upper triangle is stored in AP. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[j]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = ap[i__4].r + q__2.r, q__1.i = ap[i__4].i + 
-                               q__2.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ++k;
-/* L10: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = j;
-                   q__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, q__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[jx]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   ix = kx;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = ap[i__4].r + q__2.r, q__1.i = ap[i__4].i + 
-                               q__2.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ix += *incx;
-/* L30: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = jx;
-                   q__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, q__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               jx += *incx;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when lower triangle is stored in AP. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[j]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = j;
-                   q__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, q__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = ap[i__4].r + q__2.r, q__1.i = ap[i__4].i + 
-                               q__2.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ++k;
-/* L50: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[jx]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = jx;
-                   q__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, q__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-                   ix = jx;
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = ap[i__4].r + q__2.r, q__1.i = ap[i__4].i + 
-                               q__2.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               jx += *incx;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of CHPR  . */
-} /* chpr_ */
-/* Subroutine */ int chpr2_(char *uplo, integer *n, complex *alpha, complex *
-       x, integer *incx, complex *y, integer *incy, complex *ap, ftnlen 
-       uplo_len)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5, i__6;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CHPR2  performs the hermitian rank 2 operation */
-/*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, */
-/*  where alpha is a scalar, x and y are n element vectors and A is an */
-/*  n by n hermitian matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  AP     - COMPLEX          array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --ap;
-    --y;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("CHPR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when upper triangle is stored in AP. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[j]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = j;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = ap[i__4].r + q__3.r, q__2.i = ap[i__4].i + 
-                               q__3.i;
-                       i__6 = i__;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ++k;
-/* L10: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = j;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[jy]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = jx;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ix = kx;
-                   iy = ky;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = ap[i__4].r + q__3.r, q__2.i = ap[i__4].i + 
-                               q__3.i;
-                       i__6 = iy;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = jx;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when lower triangle is stored in AP. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[j]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = j;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = j;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = ap[i__4].r + q__3.r, q__2.i = ap[i__4].i + 
-                               q__3.i;
-                       i__6 = i__;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ++k;
-/* L50: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[jy]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = jx;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = jx;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-                   ix = jx;
-                   iy = jy;
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       iy += *incy;
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = ap[i__4].r + q__3.r, q__2.i = ap[i__4].i + 
-                               q__3.i;
-                       i__6 = iy;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               jx += *incx;
-               jy += *incy;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of CHPR2 . */
-} /* chpr2_ */
-/* Subroutine */ int crotg_(complex *ca, complex *cb, real *c__, complex *s)
-    /* System generated locals */
-    real r__1, r__2;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    double c_abs(complex *), sqrt(doublereal);
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static real norm;
-    static complex alpha;
-    static real scale;
-    if (c_abs(ca) != 0.f) {
-       goto L10;
-    }
-    *c__ = 0.f;
-    s->r = 1.f, s->i = 0.f;
-    ca->r = cb->r, ca->i = cb->i;
-    goto L20;
-    scale = c_abs(ca) + c_abs(cb);
-    q__1.r = ca->r / scale, q__1.i = ca->i / scale;
-/* Computing 2nd power */
-    r__1 = c_abs(&q__1);
-    q__2.r = cb->r / scale, q__2.i = cb->i / scale;
-/* Computing 2nd power */
-    r__2 = c_abs(&q__2);
-    norm = scale * sqrt(r__1 * r__1 + r__2 * r__2);
-    r__1 = c_abs(ca);
-    q__1.r = ca->r / r__1, q__1.i = ca->i / r__1;
-    alpha.r = q__1.r, alpha.i = q__1.i;
-    *c__ = c_abs(ca) / norm;
-    r_cnjg(&q__3, cb);
-    q__2.r = alpha.r * q__3.r - alpha.i * q__3.i, q__2.i = alpha.r * q__3.i + 
-           alpha.i * q__3.r;
-    q__1.r = q__2.r / norm, q__1.i = q__2.i / norm;
-    s->r = q__1.r, s->i = q__1.i;
-    q__1.r = norm * alpha.r, q__1.i = norm * alpha.i;
-    ca->r = q__1.r, ca->i = q__1.i;
-    return 0;
-} /* crotg_ */
-/* Subroutine */ int cscal_(integer *n, complex *ca, complex *cx, integer *
-       incx)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    complex q__1;
-    /* Local variables */
-    static integer i__, nincx;
-/*     scales a vector by a constant. */
-/*     jack dongarra, linpack,  3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --cx;
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       i__3 = i__;
-       i__4 = i__;
-       q__1.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__1.i = ca->r * cx[
-               i__4].i + ca->i * cx[i__4].r;
-       cx[i__3].r = q__1.r, cx[i__3].i = q__1.i;
-/* L10: */
-    }
-    return 0;
-/*        code for increment equal to 1 */
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       i__1 = i__;
-       i__3 = i__;
-       q__1.r = ca->r * cx[i__3].r - ca->i * cx[i__3].i, q__1.i = ca->r * cx[
-               i__3].i + ca->i * cx[i__3].r;
-       cx[i__1].r = q__1.r, cx[i__1].i = q__1.i;
-/* L30: */
-    }
-    return 0;
-} /* cscal_ */
-/* Subroutine */ int csrot_(integer *n, complex *cx, integer *incx, complex *
-       cy, integer *incy, real *c__, real *s)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    complex q__1, q__2, q__3;
-    /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
-/*     applies a plane rotation, where the cos and sin (c and s) are real */
-/*     and the vectors cx and cy are complex. */
-/*     jack dongarra, linpack, 3/11/78. */
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
-       i__3 = iy;
-       q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
-       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-       i__2 = iy;
-       i__3 = iy;
-       q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
-       i__4 = ix;
-       q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
-       q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
-       cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
-       i__2 = ix;
-       cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*       code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
-       i__3 = i__;
-       q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
-       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-       i__2 = i__;
-       i__3 = i__;
-       q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
-       i__4 = i__;
-       q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
-       q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
-       cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
-       i__2 = i__;
-       cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
-/* L30: */
-    }
-    return 0;
-} /* csrot_ */
-/* Subroutine */ int csscal_(integer *n, real *sa, complex *cx, integer *incx)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    real r__1, r__2;
-    complex q__1;
-    /* Builtin functions */
-    double r_imag(complex *);
-    /* Local variables */
-    static integer i__, nincx;
-/*     scales a complex vector by a real constant. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --cx;
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       i__3 = i__;
-       i__4 = i__;
-       r__1 = *sa * cx[i__4].r;
-       r__2 = *sa * r_imag(&cx[i__]);
-       q__1.r = r__1, q__1.i = r__2;
-       cx[i__3].r = q__1.r, cx[i__3].i = q__1.i;
-/* L10: */
-    }
-    return 0;
-/*        code for increment equal to 1 */
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       i__1 = i__;
-       i__3 = i__;
-       r__1 = *sa * cx[i__3].r;
-       r__2 = *sa * r_imag(&cx[i__]);
-       q__1.r = r__1, q__1.i = r__2;
-       cx[i__1].r = q__1.r, cx[i__1].i = q__1.i;
-/* L30: */
-    }
-    return 0;
-} /* csscal_ */
-/* Subroutine */ int cswap_(integer *n, complex *cx, integer *incx, complex *
-       cy, integer *incy)
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
-/*     interchanges two vectors. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i;
-       i__2 = ix;
-       i__3 = iy;
-       cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i;
-       i__2 = iy;
-       cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*       code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i;
-       i__2 = i__;
-       i__3 = i__;
-       cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i;
-       i__2 = i__;
-       cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i;
-/* L30: */
-    }
-    return 0;
-} /* cswap_ */
-/* Subroutine */ int csymm_(char *side, char *uplo, integer *m, integer *n, 
-       complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, 
-       complex *beta, complex *c__, integer *ldc, ftnlen side_len, ftnlen 
-       uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    complex q__1, q__2, q__3, q__4, q__5;
-    /* Local variables */
-    static integer i__, j, k, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CSYMM  performs one of the matrix-matrix operations */
-/*     C := alpha*A*B + beta*C, */
-/*  or */
-/*     C := alpha*B*A + beta*C, */
-/*  where  alpha and beta are scalars, A is a symmetric matrix and  B and */
-/*  C are m by n matrices. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  symmetric matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  symmetric  matrix   A  is  to  be */
-/*           referenced as follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is n  otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX          array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX         . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Set NROWA as the number of rows of A. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-/*     Test the input parameters. */
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("CSYMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r 
-           == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (beta->r == 0.f && beta->i == 0.f) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           q__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*B + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * b_dim1;
-                   q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                           q__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + j * c_dim1;
-                       i__6 = k + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__6].r - temp1.i * a[i__6].i, 
-                               q__2.i = temp1.r * a[i__6].i + temp1.i * a[
-                               i__6].r;
-                       q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                               q__2.i;
-                       c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-                       i__4 = k + j * b_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       q__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, q__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L50: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, 
-                               q__2.i = temp1.r * a[i__4].i + temp1.i * a[
-                               i__4].r;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       i__5 = i__ + i__ * a_dim1;
-                       q__4.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               q__4.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   i__2 = i__ + j * b_dim1;
-                   q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                           q__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       i__3 = k + j * c_dim1;
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               q__2.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       q__1.r = c__[i__4].r + q__2.r, q__1.i = c__[i__4].i + 
-                               q__2.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       i__3 = k + j * b_dim1;
-                       i__4 = k + i__ * a_dim1;
-                       q__2.r = b[i__3].r * a[i__4].r - b[i__3].i * a[i__4]
-                               .i, q__2.i = b[i__3].r * a[i__4].i + b[i__3]
-                               .i * a[i__4].r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L80: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__3].r - temp1.i * a[i__3].i, 
-                               q__2.i = temp1.r * a[i__3].i + temp1.i * a[
-                               i__3].r;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-                   } else {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__3].r - beta->i * c__[i__3]
-                               .i, q__3.i = beta->r * c__[i__3].i + beta->i *
-                                c__[i__3].r;
-                       i__4 = i__ + i__ * a_dim1;
-                       q__4.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, 
-                               q__4.i = temp1.r * a[i__4].i + temp1.i * a[
-                               i__4].r;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*B*A + beta*C. */
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = j + j * a_dim1;
-           q__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2].i, q__1.i = 
-                   alpha->r * a[i__2].i + alpha->i * a[i__2].r;
-           temp1.r = q__1.r, temp1.i = q__1.i;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * b_dim1;
-                   q__1.r = temp1.r * b[i__4].r - temp1.i * b[i__4].i, 
-                           q__1.i = temp1.r * b[i__4].i + temp1.i * b[i__4]
-                           .r;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   q__2.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           q__2.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   i__5 = i__ + j * b_dim1;
-                   q__3.r = temp1.r * b[i__5].r - temp1.i * b[i__5].i, 
-                           q__3.i = temp1.r * b[i__5].i + temp1.i * b[i__5]
-                           .r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = k + j * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               } else {
-                   i__3 = j + k * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   q__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           q__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                           q__2.i;
-                   c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = j + k * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               } else {
-                   i__3 = k + j * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   q__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           q__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                           q__2.i;
-                   c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-    return 0;
-/*     End of CSYMM . */
-} /* csymm_ */
-/* Subroutine */ int csyr2k_(char *uplo, char *trans, integer *n, integer *k, 
-       complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, 
-       complex *beta, complex *c__, integer *ldc, ftnlen uplo_len, ftnlen 
-       trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6, i__7;
-    complex q__1, q__2, q__3, q__4, q__5;
-    /* Local variables */
-    static integer i__, j, l, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CSYR2K  performs one of the symmetric rank 2k operations */
-/*     C := alpha*A*B' + alpha*B*A' + beta*C, */
-/*  or */
-/*     C := alpha*A'*B + alpha*B'*A + beta*C, */
-/*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix */
-/*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n */
-/*  matrices in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'    C := alpha*A*B' + alpha*B*A' + */
-/*                                         beta*C. */
-/*              TRANS = 'T' or 't'    C := alpha*A'*B + alpha*B'*A + */
-/*                                         beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'T' or 't',  K  specifies  the number of rows of the */
-/*           matrices  A and B.  K must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("CSYR2K", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) && (
-           beta->r == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (upper) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L90: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 
-                           0.f || b[i__4].i != 0.f)) {
-                       i__3 = j + l * b_dim1;
-                       q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__3 = j + l * a_dim1;
-                       q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, q__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
-                                   .i + q__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           q__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, q__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + 
-                                   q__4.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L140: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 
-                           0.f || b[i__4].i != 0.f)) {
-                       i__3 = j + l * b_dim1;
-                       q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__3 = j + l * a_dim1;
-                       q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, q__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
-                                   .i + q__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           q__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, q__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + 
-                                   q__4.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1.r = 0.f, temp1.i = 0.f;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__4 = l + i__ * b_dim1;
-                       i__5 = l + j * a_dim1;
-                       q__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, q__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L190: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               q__2.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               q__4.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1.r = 0.f, temp1.i = 0.f;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__4 = l + i__ * b_dim1;
-                       i__5 = l + j * a_dim1;
-                       q__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, q__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L220: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               q__2.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               q__4.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-    return 0;
-/*     End of CSYR2K. */
-} /* csyr2k_ */
-/* Subroutine */ int csyrk_(char *uplo, char *trans, integer *n, integer *k, 
-       complex *alpha, complex *a, integer *lda, complex *beta, complex *c__,
-        integer *ldc, ftnlen uplo_len, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    complex q__1, q__2, q__3;
-    /* Local variables */
-    static integer i__, j, l, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CSYRK  performs one of the symmetric rank k operations */
-/*     C := alpha*A*A' + beta*C, */
-/*  or */
-/*     C := alpha*A'*A + beta*C, */
-/*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix */
-/*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix */
-/*  in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C. */
-/*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'T' or 't',  K  specifies  the number of rows of the */
-/*           matrix A.  K must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("CSYRK ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) && (
-           beta->r == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (upper) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*A' + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L90: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                       i__3 = j + l * a_dim1;
-                       q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L140: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                       i__3 = j + l * a_dim1;
-                       q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*A'*A + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * a_dim1;
-                       q__2.r = a[i__4].r * a[i__5].r - a[i__4].i * a[i__5]
-                               .i, q__2.i = a[i__4].r * a[i__5].i + a[i__4]
-                               .i * a[i__5].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L190: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * a_dim1;
-                       q__2.r = a[i__4].r * a[i__5].r - a[i__4].i * a[i__5]
-                               .i, q__2.i = a[i__4].r * a[i__5].i + a[i__4]
-                               .i * a[i__5].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L220: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-    return 0;
-/*     End of CSYRK . */
-} /* csyrk_ */
-/* Subroutine */ int ctbmv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, complex *a, integer *lda, complex *x, integer *incx, 
-       ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CTBMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CTBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*         Form  x := A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__5 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__4 = j;
-                           i__2 = j;
-                           i__3 = kplus1 + j * a_dim1;
-                           q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, q__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__4 = jx;
-                   if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
-                       i__4 = jx;
-                       temp.r = x[i__4].r, temp.i = x[i__4].i;
-                       ix = kx;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           i__4 = ix;
-                           i__2 = ix;
-                           i__5 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__2].r + q__2.r, q__1.i = x[i__2].i + 
-                                   q__2.i;
-                           x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__3 = jx;
-                           i__4 = jx;
-                           i__2 = kplus1 + j * a_dim1;
-                           q__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[
-                                   i__2].i, q__1.i = x[i__4].r * a[i__2].i + 
-                                   x[i__4].i * a[i__2].r;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                       }
-                   }
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__1 = *n, i__3 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                           i__1 = i__;
-                           i__3 = i__;
-                           i__2 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   q__2.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__4 = j;
-                           i__1 = j;
-                           i__3 = j * a_dim1 + 1;
-                           q__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[
-                                   i__3].i, q__1.i = x[i__1].r * a[i__3].i + 
-                                   x[i__1].i * a[i__3].r;
-                           x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__4 = jx;
-                   if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
-                       i__4 = jx;
-                       temp.r = x[i__4].r, temp.i = x[i__4].i;
-                       ix = kx;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__4 = *n, i__1 = j + *k;
-                       i__3 = j + 1;
-                       for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                           i__4 = ix;
-                           i__1 = ix;
-                           i__2 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   q__2.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           q__1.r = x[i__1].r + q__2.r, q__1.i = x[i__1].i + 
-                                   q__2.i;
-                           x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__3 = jx;
-                           i__4 = jx;
-                           i__1 = j * a_dim1 + 1;
-                           q__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[
-                                   i__1].i, q__1.i = x[i__4].r * a[i__1].i + 
-                                   x[i__4].i * a[i__1].r;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                       }
-                   }
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__3 = j;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   l = kplus1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__3 = kplus1 + j * a_dim1;
-                           q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   q__1.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__1 = i__;
-                           q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-                                   i__1].i, q__2.i = a[i__4].r * x[i__1].i + 
-                                   a[i__4].i * x[i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = i__;
-                           q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, 
-                                   q__2.i = q__3.r * x[i__4].i + q__3.i * x[
-                                   i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                       }
-                   }
-                   i__3 = j;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-/* L110: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__3 = jx;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   kx -= *incx;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__3 = kplus1 + j * a_dim1;
-                           q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   q__1.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__1 = ix;
-                           q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-                                   i__1].i, q__2.i = a[i__4].r * x[i__1].i + 
-                                   a[i__4].i * x[i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = ix;
-                           q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, 
-                                   q__2.i = q__3.r * x[i__4].i + q__3.i * x[
-                                   i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L130: */
-                       }
-                   }
-                   i__3 = jx;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-                   jx -= *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   i__4 = j;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   l = 1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__4 = j * a_dim1 + 1;
-                           q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__1.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__2 = i__;
-                           q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, q__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = i__;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-                       }
-                   }
-                   i__4 = j;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   i__4 = jx;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   kx += *incx;
-                   ix = kx;
-                   l = 1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__4 = j * a_dim1 + 1;
-                           q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__1.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__2 = ix;
-                           q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, q__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = ix;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L190: */
-                       }
-                   }
-                   i__4 = jx;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-                   jx += *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of CTBMV . */
-} /* ctbmv_ */
-/* Subroutine */ int ctbsv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, complex *a, integer *lda, complex *x, integer *incx, 
-       ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CTBSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular band matrix, with ( k + 1 ) */
-/*  diagonals. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CTBSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed by sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       l = kplus1 - j;
-                       if (nounit) {
-                           i__1 = j;
-                           c_div(&q__1, &x[j], &a[kplus1 + j * a_dim1]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           q__1.r = x[i__3].r - q__2.r, q__1.i = x[i__3].i - 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   kx -= *incx;
-                   i__1 = jx;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       ix = kx;
-                       l = kplus1 - j;
-                       if (nounit) {
-                           i__1 = jx;
-                           c_div(&q__1, &x[jx], &a[kplus1 + j * a_dim1]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           q__1.r = x[i__3].r - q__2.r, q__1.i = x[i__3].i - 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                           ix -= *incx;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       l = 1 - j;
-                       if (nounit) {
-                           i__2 = j;
-                           c_div(&q__1, &x[j], &a[j * a_dim1 + 1]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   kx += *incx;
-                   i__2 = jx;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       ix = kx;
-                       l = 1 - j;
-                       if (nounit) {
-                           i__2 = jx;
-                           c_div(&q__1, &x[jx], &a[j * a_dim1 + 1]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                           ix += *incx;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A') )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   l = kplus1 - j;
-                   if (noconj) {
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           i__2 = l + i__ + j * a_dim1;
-                           i__3 = i__;
-                           q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, q__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[kplus1 + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = i__;
-                           q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, 
-                                   q__2.i = q__3.r * x[i__4].i + q__3.i * x[
-                                   i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__3 = j;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__3 = jx;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (noconj) {
-/* Computing MAX */
-                       i__3 = 1, i__4 = j - *k;
-                       i__2 = j - 1;
-                       for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                           i__3 = l + i__ + j * a_dim1;
-                           i__4 = ix;
-                           q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, q__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[kplus1 + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__2 = ix;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__4 = jx;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   l = 1 - j;
-                   if (noconj) {
-/* Computing MIN */
-                       i__1 = *n, i__4 = j + *k;
-                       i__2 = j + 1;
-                       for (i__ = min(i__1,i__4); i__ >= i__2; --i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__4 = i__;
-                           q__2.r = a[i__1].r * x[i__4].r - a[i__1].i * x[
-                                   i__4].i, q__2.i = a[i__1].r * x[i__4].i + 
-                                   a[i__1].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j * a_dim1 + 1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-/* Computing MIN */
-                       i__2 = *n, i__1 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__2,i__1); i__ >= i__4; --i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__2 = i__;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__4 = j;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__4 = jx;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   ix = kx;
-                   l = 1 - j;
-                   if (noconj) {
-/* Computing MIN */
-                       i__4 = *n, i__2 = j + *k;
-                       i__1 = j + 1;
-                       for (i__ = min(i__4,i__2); i__ >= i__1; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__2 = ix;
-                           q__2.r = a[i__4].r * x[i__2].r - a[i__4].i * x[
-                                   i__2].i, q__2.i = a[i__4].r * x[i__2].i + 
-                                   a[i__4].i * x[i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j * a_dim1 + 1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-/* Computing MIN */
-                       i__1 = *n, i__4 = j + *k;
-                       i__2 = j + 1;
-                       for (i__ = min(i__1,i__4); i__ >= i__2; --i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = ix;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of CTBSV . */
-} /* ctbsv_ */
-/* Subroutine */ int ctpmv_(char *uplo, char *trans, char *diag, integer *n, 
-       complex *ap, complex *x, integer *incx, ftnlen uplo_len, ftnlen 
-       trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CTPMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  AP     - COMPLEX          array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("CTPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x:= A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       k = kk;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = k;
-                           q__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, q__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                           ++k;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__2 = j;
-                           i__3 = j;
-                           i__4 = kk + j - 1;
-                           q__1.r = x[i__3].r * ap[i__4].r - x[i__3].i * ap[
-                                   i__4].i, q__1.i = x[i__3].r * ap[i__4].i 
-                                   + x[i__3].i * ap[i__4].r;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                   }
-                   kk += j;
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = kx;
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = k;
-                           q__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, q__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__2 = jx;
-                           i__3 = jx;
-                           i__4 = kk + j - 1;
-                           q__1.r = x[i__3].r * ap[i__4].r - x[i__3].i * ap[
-                                   i__4].i, q__1.i = x[i__3].r * ap[i__4].i 
-                                   + x[i__3].i * ap[i__4].r;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                   }
-                   jx += *incx;
-                   kk += j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       k = kk;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = k;
-                           q__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, q__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                           --k;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__1 = j;
-                           i__2 = j;
-                           i__3 = kk - *n + j;
-                           q__1.r = x[i__2].r * ap[i__3].r - x[i__2].i * ap[
-                                   i__3].i, q__1.i = x[i__2].r * ap[i__3].i 
-                                   + x[i__2].i * ap[i__3].r;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                   }
-                   kk -= *n - j + 1;
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = kx;
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = k;
-                           q__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, q__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__1 = jx;
-                           i__2 = jx;
-                           i__3 = kk - *n + j;
-                           q__1.r = x[i__2].r * ap[i__3].r - x[i__2].i * ap[
-                                   i__3].i, q__1.i = x[i__2].r * ap[i__3].i 
-                                   + x[i__2].i * ap[i__3].r;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   k = kk - 1;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = kk;
-                           q__1.r = temp.r * ap[i__1].r - temp.i * ap[i__1]
-                                   .i, q__1.i = temp.r * ap[i__1].i + temp.i 
-                                   * ap[i__1].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = k;
-                           i__2 = i__;
-                           q__2.r = ap[i__1].r * x[i__2].r - ap[i__1].i * x[
-                                   i__2].i, q__2.i = ap[i__1].r * x[i__2].i 
-                                   + ap[i__1].i * x[i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           --k;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__1 = i__;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           --k;
-/* L100: */
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   kk -= j;
-/* L110: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = kk;
-                           q__1.r = temp.r * ap[i__1].r - temp.i * ap[i__1]
-                                   .i, q__1.i = temp.r * ap[i__1].i + temp.i 
-                                   * ap[i__1].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           i__2 = k;
-                           i__3 = ix;
-                           q__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, q__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           r_cnjg(&q__3, &ap[k]);
-                           i__2 = ix;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L130: */
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-                   kk -= j;
-/* L140: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   k = kk + 1;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = kk;
-                           q__1.r = temp.r * ap[i__2].r - temp.i * ap[i__2]
-                                   .i, q__1.i = temp.r * ap[i__2].i + temp.i 
-                                   * ap[i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = k;
-                           i__4 = i__;
-                           q__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, q__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ++k;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__3 = i__;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ++k;
-/* L160: */
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   kk += *n - j + 1;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = kk;
-                           q__1.r = temp.r * ap[i__2].r - temp.i * ap[i__2]
-                                   .i, q__1.i = temp.r * ap[i__2].i + temp.i 
-                                   * ap[i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           i__3 = k;
-                           i__4 = ix;
-                           q__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, q__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           r_cnjg(&q__3, &ap[k]);
-                           i__3 = ix;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of CTPMV . */
-} /* ctpmv_ */
-/* Subroutine */ int ctpsv_(char *uplo, char *trans, char *diag, integer *n, 
-       complex *ap, complex *x, integer *incx, ftnlen uplo_len, ftnlen 
-       trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CTPSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix, supplied in packed form. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  AP     - COMPLEX          array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("CTPSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       if (nounit) {
-                           i__1 = j;
-                           c_div(&q__1, &x[j], &ap[kk]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       k = kk - 1;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__;
-                           i__2 = i__;
-                           i__3 = k;
-                           q__2.r = temp.r * ap[i__3].r - temp.i * ap[i__3]
-                                   .i, q__2.i = temp.r * ap[i__3].i + temp.i 
-                                   * ap[i__3].r;
-                           q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - 
-                                   q__2.i;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                           --k;
-/* L10: */
-                       }
-                   }
-                   kk -= j;
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       if (nounit) {
-                           i__1 = jx;
-                           c_div(&q__1, &x[jx], &ap[kk]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = jx;
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = k;
-                           q__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, q__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           q__1.r = x[i__3].r - q__2.r, q__1.i = x[i__3].i - 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       if (nounit) {
-                           i__2 = j;
-                           c_div(&q__1, &x[j], &ap[kk]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       k = kk + 1;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = k;
-                           q__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, q__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                           ++k;
-/* L50: */
-                       }
-                   }
-                   kk += *n - j + 1;
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       if (nounit) {
-                           i__2 = jx;
-                           c_div(&q__1, &x[jx], &ap[kk]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = jx;
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = k;
-                           q__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, q__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   k = kk;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = k;
-                           i__4 = i__;
-                           q__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, q__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ++k;
-/* L90: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &ap[kk + j - 1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__3 = i__;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ++k;
-/* L100: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk + j - 1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   kk += j;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = kx;
-                   if (noconj) {
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           i__3 = k;
-                           i__4 = ix;
-                           q__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, q__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &ap[kk + j - 1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__3 = ix;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk + j - 1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-                   kk += j;
-/* L140: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   k = kk;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = k;
-                           i__3 = i__;
-                           q__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, q__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           --k;
-/* L150: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &ap[kk - *n + j]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__2 = i__;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           --k;
-/* L160: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk - *n + j]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   kk -= *n - j + 1;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = kx;
-                   if (noconj) {
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           i__2 = k;
-                           i__3 = ix;
-                           q__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, q__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &ap[kk - *n + j]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__2 = ix;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk - *n + j]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of CTPSV . */
-} /* ctpsv_ */
-/* Subroutine */ int ctrmm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, complex *alpha, complex *a, integer *lda, 
-       complex *b, integer *ldb, ftnlen side_len, ftnlen uplo_len, ftnlen 
-       transa_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, k, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static logical lside;
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CTRMM  performs one of the matrix-matrix operations */
-/*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ) */
-/*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-/*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ). */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE specifies whether  op( A ) multiplies B from */
-/*           the left or right as follows: */
-/*              SIDE = 'L' or 'l'   B := alpha*op( A )*B. */
-/*              SIDE = 'R' or 'r'   B := alpha*B*op( A ). */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-/*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ). */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX          array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain the matrix  B,  and  on exit  is overwritten  by the */
-/*           transformed matrix. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    noconj = lsame_(transa, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("CTRMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               i__3 = i__ + j * b_dim1;
-               b[i__3].r = 0.f, b[i__3].i = 0.f;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*A*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * b_dim1;
-                       if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
-                           i__3 = k + j * b_dim1;
-                           q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, q__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           i__3 = k - 1;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * a_dim1;
-                               q__2.r = temp.r * a[i__6].r - temp.i * a[i__6]
-                                       .i, q__2.i = temp.r * a[i__6].i + 
-                                       temp.i * a[i__6].r;
-                               q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
-                                       .i + q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L30: */
-                           }
-                           if (nounit) {
-                               i__3 = k + k * a_dim1;
-                               q__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
-                                       .i, q__1.i = temp.r * a[i__3].i + 
-                                       temp.i * a[i__3].r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__3 = k + j * b_dim1;
-                           b[i__3].r = temp.r, b[i__3].i = temp.i;
-                       }
-/* L40: */
-                   }
-/* L50: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (k = *m; k >= 1; --k) {
-                       i__2 = k + j * b_dim1;
-                       if (b[i__2].r != 0.f || b[i__2].i != 0.f) {
-                           i__2 = k + j * b_dim1;
-                           q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2]
-                                   .i, q__1.i = alpha->r * b[i__2].i + 
-                                   alpha->i * b[i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           i__2 = k + j * b_dim1;
-                           b[i__2].r = temp.r, b[i__2].i = temp.i;
-                           if (nounit) {
-                               i__2 = k + j * b_dim1;
-                               i__3 = k + j * b_dim1;
-                               i__4 = k + k * a_dim1;
-                               q__1.r = b[i__3].r * a[i__4].r - b[i__3].i * 
-                                       a[i__4].i, q__1.i = b[i__3].r * a[
-                                       i__4].i + b[i__3].i * a[i__4].r;
-                               b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = k + 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * a_dim1;
-                               q__2.r = temp.r * a[i__5].r - temp.i * a[i__5]
-                                       .i, q__2.i = temp.r * a[i__5].i + 
-                                       temp.i * a[i__5].r;
-                               q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
-                                       .i + q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L60: */
-                           }
-                       }
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       i__2 = i__ + j * b_dim1;
-                       temp.r = b[i__2].r, temp.i = b[i__2].i;
-                       if (noconj) {
-                           if (nounit) {
-                               i__2 = i__ + i__ * a_dim1;
-                               q__1.r = temp.r * a[i__2].r - temp.i * a[i__2]
-                                       .i, q__1.i = temp.r * a[i__2].i + 
-                                       temp.i * a[i__2].r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__2 = i__ - 1;
-                           for (k = 1; k <= i__2; ++k) {
-                               i__3 = k + i__ * a_dim1;
-                               i__4 = k + j * b_dim1;
-                               q__2.r = a[i__3].r * b[i__4].r - a[i__3].i * 
-                                       b[i__4].i, q__2.i = a[i__3].r * b[
-                                       i__4].i + a[i__3].i * b[i__4].r;
-                               q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                           }
-                       } else {
-                           if (nounit) {
-                               r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-                               q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                       q__1.i = temp.r * q__2.i + temp.i * 
-                                       q__2.r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__2 = i__ - 1;
-                           for (k = 1; k <= i__2; ++k) {
-                               r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                               i__3 = k + j * b_dim1;
-                               q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3]
-                                       .i, q__2.i = q__3.r * b[i__3].i + 
-                                       q__3.i * b[i__3].r;
-                               q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                           }
-                       }
-                       i__2 = i__ + j * b_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L110: */
-                   }
-/* L120: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       temp.r = b[i__3].r, temp.i = b[i__3].i;
-                       if (noconj) {
-                           if (nounit) {
-                               i__3 = i__ + i__ * a_dim1;
-                               q__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
-                                       .i, q__1.i = temp.r * a[i__3].i + 
-                                       temp.i * a[i__3].r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__3 = *m;
-                           for (k = i__ + 1; k <= i__3; ++k) {
-                               i__4 = k + i__ * a_dim1;
-                               i__5 = k + j * b_dim1;
-                               q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * 
-                                       b[i__5].i, q__2.i = a[i__4].r * b[
-                                       i__5].i + a[i__4].i * b[i__5].r;
-                               q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L130: */
-                           }
-                       } else {
-                           if (nounit) {
-                               r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-                               q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                       q__1.i = temp.r * q__2.i + temp.i * 
-                                       q__2.r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__3 = *m;
-                           for (k = i__ + 1; k <= i__3; ++k) {
-                               r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                               i__4 = k + j * b_dim1;
-                               q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4]
-                                       .i, q__2.i = q__3.r * b[i__4].i + 
-                                       q__3.i * b[i__4].r;
-                               q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L140: */
-                           }
-                       }
-                       i__3 = i__ + j * b_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L150: */
-                   }
-/* L160: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*B*A. */
-           if (upper) {
-               for (j = *n; j >= 1; --j) {
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       i__1 = j + j * a_dim1;
-                       q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                               q__1.i = temp.r * a[i__1].i + temp.i * a[i__1]
-                               .r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                   }
-                   i__1 = *m;
-                   for (i__ = 1; i__ <= i__1; ++i__) {
-                       i__2 = i__ + j * b_dim1;
-                       i__3 = i__ + j * b_dim1;
-                       q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                               q__1.i = temp.r * b[i__3].i + temp.i * b[i__3]
-                               .r;
-                       b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L170: */
-                   }
-                   i__1 = j - 1;
-                   for (k = 1; k <= i__1; ++k) {
-                       i__2 = k + j * a_dim1;
-                       if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
-                           i__2 = k + j * a_dim1;
-                           q__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2]
-                                   .i, q__1.i = alpha->r * a[i__2].i + 
-                                   alpha->i * a[i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, q__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
-                                       .i + q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L180: */
-                           }
-                       }
-/* L190: */
-                   }
-/* L200: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       i__2 = j + j * a_dim1;
-                       q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                               q__1.i = temp.r * a[i__2].i + temp.i * a[i__2]
-                               .r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                   }
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       i__4 = i__ + j * b_dim1;
-                       q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                               q__1.i = temp.r * b[i__4].i + temp.i * b[i__4]
-                               .r;
-                       b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L210: */
-                   }
-                   i__2 = *n;
-                   for (k = j + 1; k <= i__2; ++k) {
-                       i__3 = k + j * a_dim1;
-                       if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                           i__3 = k + j * a_dim1;
-                           q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3]
-                                   .i, q__1.i = alpha->r * a[i__3].i + 
-                                   alpha->i * a[i__3].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, q__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
-                                       .i + q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L220: */
-                           }
-                       }
-/* L230: */
-                   }
-/* L240: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ). */
-           if (upper) {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   i__2 = k - 1;
-                   for (j = 1; j <= i__2; ++j) {
-                       i__3 = j + k * a_dim1;
-                       if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                           if (noconj) {
-                               i__3 = j + k * a_dim1;
-                               q__1.r = alpha->r * a[i__3].r - alpha->i * a[
-                                       i__3].i, q__1.i = alpha->r * a[i__3]
-                                       .i + alpha->i * a[i__3].r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           } else {
-                               r_cnjg(&q__2, &a[j + k * a_dim1]);
-                               q__1.r = alpha->r * q__2.r - alpha->i * 
-                                       q__2.i, q__1.i = alpha->r * q__2.i + 
-                                       alpha->i * q__2.r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, q__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
-                                       .i + q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L250: */
-                           }
-                       }
-/* L260: */
-                   }
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       if (noconj) {
-                           i__2 = k + k * a_dim1;
-                           q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   q__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       } else {
-                           r_cnjg(&q__2, &a[k + k * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   if (temp.r != 1.f || temp.i != 0.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   q__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L270: */
-                       }
-                   }
-/* L280: */
-               }
-           } else {
-               for (k = *n; k >= 1; --k) {
-                   i__1 = *n;
-                   for (j = k + 1; j <= i__1; ++j) {
-                       i__2 = j + k * a_dim1;
-                       if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
-                           if (noconj) {
-                               i__2 = j + k * a_dim1;
-                               q__1.r = alpha->r * a[i__2].r - alpha->i * a[
-                                       i__2].i, q__1.i = alpha->r * a[i__2]
-                                       .i + alpha->i * a[i__2].r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           } else {
-                               r_cnjg(&q__2, &a[j + k * a_dim1]);
-                               q__1.r = alpha->r * q__2.r - alpha->i * 
-                                       q__2.i, q__1.i = alpha->r * q__2.i + 
-                                       alpha->i * q__2.r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, q__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
-                                       .i + q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L290: */
-                           }
-                       }
-/* L300: */
-                   }
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       if (noconj) {
-                           i__1 = k + k * a_dim1;
-                           q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   q__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       } else {
-                           r_cnjg(&q__2, &a[k + k * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   if (temp.r != 1.f || temp.i != 0.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   q__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L310: */
-                       }
-                   }
-/* L320: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of CTRMM . */
-} /* ctrmm_ */
-/* Subroutine */ int ctrmv_(char *uplo, char *trans, char *diag, integer *n, 
-       complex *a, integer *lda, complex *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CTRMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("CTRMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__2 = j;
-                           i__3 = j;
-                           i__4 = j + j * a_dim1;
-                           q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
-                                   i__4].i, q__1.i = x[i__3].r * a[i__4].i + 
-                                   x[i__3].i * a[i__4].r;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = kx;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__2 = jx;
-                           i__3 = jx;
-                           i__4 = j + j * a_dim1;
-                           q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
-                                   i__4].i, q__1.i = x[i__3].r * a[i__4].i + 
-                                   x[i__3].i * a[i__4].r;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                   }
-                   jx += *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__1 = j;
-                           i__2 = j;
-                           i__3 = j + j * a_dim1;
-                           q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, q__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = kx;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__1 = jx;
-                           i__2 = jx;
-                           i__3 = j + j * a_dim1;
-                           q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, q__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                   }
-                   jx -= *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = j + j * a_dim1;
-                           q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   q__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__ + j * a_dim1;
-                           i__2 = i__;
-                           q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, q__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__1 = i__;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = j + j * a_dim1;
-                           q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   q__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           i__1 = i__ + j * a_dim1;
-                           i__2 = ix;
-                           q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, q__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__1 = ix;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L130: */
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = j + j * a_dim1;
-                           q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   q__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = i__;
-                           q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, q__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__3 = i__;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = j + j * a_dim1;
-                           q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   q__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = ix;
-                           q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, q__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__3 = ix;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of CTRMV . */
-} /* ctrmv_ */
-/* Subroutine */ int ctrsm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, complex *alpha, complex *a, integer *lda, 
-       complex *b, integer *ldb, ftnlen side_len, ftnlen uplo_len, ftnlen 
-       transa_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6, i__7;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, k, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static logical lside;
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CTRSM  solves one of the matrix equations */
-/*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B, */
-/*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-/*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ). */
-/*  The matrix X is overwritten on B. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry, SIDE specifies whether op( A ) appears on the left */
-/*           or right of X as follows: */
-/*              SIDE = 'L' or 'l'   op( A )*X = alpha*B. */
-/*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B. */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-/*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ). */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX         . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX          array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain  the  right-hand  side  matrix  B,  and  on exit  is */
-/*           overwritten by the solution matrix  X. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    noconj = lsame_(transa, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("CTRSM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               i__3 = i__ + j * b_dim1;
-               b[i__3].r = 0.f, b[i__3].i = 0.f;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*inv( A )*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, q__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L30: */
-                       }
-                   }
-                   for (k = *m; k >= 1; --k) {
-                       i__2 = k + j * b_dim1;
-                       if (b[i__2].r != 0.f || b[i__2].i != 0.f) {
-                           if (nounit) {
-                               i__2 = k + j * b_dim1;
-                               c_div(&q__1, &b[k + j * b_dim1], &a[k + k * 
-                                       a_dim1]);
-                               b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-                           }
-                           i__2 = k - 1;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = k + j * b_dim1;
-                               i__6 = i__ + k * a_dim1;
-                               q__2.r = b[i__5].r * a[i__6].r - b[i__5].i * 
-                                       a[i__6].i, q__2.i = b[i__5].r * a[
-                                       i__6].i + b[i__5].i * a[i__6].r;
-                               q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
-                                       .i - q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L40: */
-                           }
-                       }
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, q__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L70: */
-                       }
-                   }
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * b_dim1;
-                       if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
-                           if (nounit) {
-                               i__3 = k + j * b_dim1;
-                               c_div(&q__1, &b[k + j * b_dim1], &a[k + k * 
-                                       a_dim1]);
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = k + 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = k + j * b_dim1;
-                               i__7 = i__ + k * a_dim1;
-                               q__2.r = b[i__6].r * a[i__7].r - b[i__6].i * 
-                                       a[i__7].i, q__2.i = b[i__6].r * a[
-                                       i__7].i + b[i__6].i * a[i__7].r;
-                               q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
-                                       .i - q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L80: */
-                           }
-                       }
-/* L90: */
-                   }
-/* L100: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*inv( A' )*B */
-/*           or    B := alpha*inv( conjg( A' ) )*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       if (noconj) {
-                           i__3 = i__ - 1;
-                           for (k = 1; k <= i__3; ++k) {
-                               i__4 = k + i__ * a_dim1;
-                               i__5 = k + j * b_dim1;
-                               q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * 
-                                       b[i__5].i, q__2.i = a[i__4].r * b[
-                                       i__5].i + a[i__4].i * b[i__5].r;
-                               q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L110: */
-                           }
-                           if (nounit) {
-                               c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                       } else {
-                           i__3 = i__ - 1;
-                           for (k = 1; k <= i__3; ++k) {
-                               r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                               i__4 = k + j * b_dim1;
-                               q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4]
-                                       .i, q__2.i = q__3.r * b[i__4].i + 
-                                       q__3.i * b[i__4].r;
-                               q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L120: */
-                           }
-                           if (nounit) {
-                               r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-                               c_div(&q__1, &temp, &q__2);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                       }
-                       i__3 = i__ + j * b_dim1;
-                       b[i__3].r = temp.r, b[i__3].i = temp.i;
-/* L130: */
-                   }
-/* L140: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       i__2 = i__ + j * b_dim1;
-                       q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                               q__1.i = alpha->r * b[i__2].i + alpha->i * b[
-                               i__2].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       if (noconj) {
-                           i__2 = *m;
-                           for (k = i__ + 1; k <= i__2; ++k) {
-                               i__3 = k + i__ * a_dim1;
-                               i__4 = k + j * b_dim1;
-                               q__2.r = a[i__3].r * b[i__4].r - a[i__3].i * 
-                                       b[i__4].i, q__2.i = a[i__3].r * b[
-                                       i__4].i + a[i__3].i * b[i__4].r;
-                               q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-                           }
-                           if (nounit) {
-                               c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                       } else {
-                           i__2 = *m;
-                           for (k = i__ + 1; k <= i__2; ++k) {
-                               r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                               i__3 = k + j * b_dim1;
-                               q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3]
-                                       .i, q__2.i = q__3.r * b[i__3].i + 
-                                       q__3.i * b[i__3].r;
-                               q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-                           }
-                           if (nounit) {
-                               r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-                               c_div(&q__1, &temp, &q__2);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                       }
-                       i__2 = i__ + j * b_dim1;
-                       b[i__2].r = temp.r, b[i__2].i = temp.i;
-/* L170: */
-                   }
-/* L180: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*B*inv( A ). */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, q__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = j - 1;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * a_dim1;
-                       if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = k + j * a_dim1;
-                               i__7 = i__ + k * b_dim1;
-                               q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * 
-                                       b[i__7].i, q__2.i = a[i__6].r * b[
-                                       i__7].i + a[i__6].i * b[i__7].r;
-                               q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
-                                       .i - q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L200: */
-                           }
-                       }
-/* L210: */
-                   }
-                   if (nounit) {
-                       c_div(&q__1, &c_b21, &a[j + j * a_dim1]);
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   q__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L220: */
-                       }
-                   }
-/* L230: */
-               }
-           } else {
-               for (j = *n; j >= 1; --j) {
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + j * b_dim1;
-                           i__3 = i__ + j * b_dim1;
-                           q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, q__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L240: */
-                       }
-                   }
-                   i__1 = *n;
-                   for (k = j + 1; k <= i__1; ++k) {
-                       i__2 = k + j * a_dim1;
-                       if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = k + j * a_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               q__2.r = a[i__5].r * b[i__6].r - a[i__5].i * 
-                                       b[i__6].i, q__2.i = a[i__5].r * b[
-                                       i__6].i + a[i__5].i * b[i__6].r;
-                               q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
-                                       .i - q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L250: */
-                           }
-                       }
-/* L260: */
-                   }
-                   if (nounit) {
-                       c_div(&q__1, &c_b21, &a[j + j * a_dim1]);
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + j * b_dim1;
-                           i__3 = i__ + j * b_dim1;
-                           q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   q__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L270: */
-                       }
-                   }
-/* L280: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*B*inv( A' ) */
-/*           or    B := alpha*B*inv( conjg( A' ) ). */
-           if (upper) {
-               for (k = *n; k >= 1; --k) {
-                   if (nounit) {
-                       if (noconj) {
-                           c_div(&q__1, &c_b21, &a[k + k * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       } else {
-                           r_cnjg(&q__2, &a[k + k * a_dim1]);
-                           c_div(&q__1, &c_b21, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   q__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L290: */
-                       }
-                   }
-                   i__1 = k - 1;
-                   for (j = 1; j <= i__1; ++j) {
-                       i__2 = j + k * a_dim1;
-                       if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
-                           if (noconj) {
-                               i__2 = j + k * a_dim1;
-                               temp.r = a[i__2].r, temp.i = a[i__2].i;
-                           } else {
-                               r_cnjg(&q__1, &a[j + k * a_dim1]);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, q__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
-                                       .i - q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L300: */
-                           }
-                       }
-/* L310: */
-                   }
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, q__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L320: */
-                       }
-                   }
-/* L330: */
-               }
-           } else {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   if (nounit) {
-                       if (noconj) {
-                           c_div(&q__1, &c_b21, &a[k + k * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       } else {
-                           r_cnjg(&q__2, &a[k + k * a_dim1]);
-                           c_div(&q__1, &c_b21, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   q__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L340: */
-                       }
-                   }
-                   i__2 = *n;
-                   for (j = k + 1; j <= i__2; ++j) {
-                       i__3 = j + k * a_dim1;
-                       if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                           if (noconj) {
-                               i__3 = j + k * a_dim1;
-                               temp.r = a[i__3].r, temp.i = a[i__3].i;
-                           } else {
-                               r_cnjg(&q__1, &a[j + k * a_dim1]);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, q__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
-                                       .i - q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L350: */
-                           }
-                       }
-/* L360: */
-                   }
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, q__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L370: */
-                       }
-                   }
-/* L380: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of CTRSM . */
-} /* ctrsm_ */
-/* Subroutine */ int ctrsv_(char *uplo, char *trans, char *diag, integer *n, 
-       complex *a, integer *lda, complex *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  CTRSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("CTRSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       if (nounit) {
-                           i__1 = j;
-                           c_div(&q__1, &x[j], &a[j + j * a_dim1]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__;
-                           i__2 = i__;
-                           i__3 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   q__2.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - 
-                                   q__2.i;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       if (nounit) {
-                           i__1 = jx;
-                           c_div(&q__1, &x[jx], &a[j + j * a_dim1]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = jx;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           i__1 = ix;
-                           i__2 = ix;
-                           i__3 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   q__2.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - 
-                                   q__2.i;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       if (nounit) {
-                           i__2 = j;
-                           c_div(&q__1, &x[j], &a[j + j * a_dim1]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       if (nounit) {
-                           i__2 = jx;
-                           c_div(&q__1, &x[jx], &a[j + j * a_dim1]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = jx;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = i__;
-                           q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, q__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__3 = i__;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   ix = kx;
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = ix;
-                           q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, q__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__3 = ix;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__ + j * a_dim1;
-                           i__3 = i__;
-                           q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, q__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__2 = i__;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   ix = kx;
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__ + j * a_dim1;
-                           i__3 = ix;
-                           q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, q__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__2 = ix;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of CTRSV . */
-} /* ctrsv_ */
-doublereal dasum_(integer *n, doublereal *dx, integer *incx)
-    /* System generated locals */
-    integer i__1, i__2;
-    doublereal ret_val, d__1, d__2, d__3, d__4, d__5, d__6;
-    /* Local variables */
-    static integer i__, m, mp1;
-    static doublereal dtemp;
-    static integer nincx;
-/*     takes the sum of the absolute values. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --dx;
-    /* Function Body */
-    ret_val = 0.;
-    dtemp = 0.;
-    if (*n <= 0 || *incx <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       dtemp += (d__1 = dx[i__], abs(d__1));
-/* L10: */
-    }
-    ret_val = dtemp;
-    return ret_val;
-/*        code for increment equal to 1 */
-/*        clean-up loop */
-    m = *n % 6;
-    if (m == 0) {
-       goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       dtemp += (d__1 = dx[i__], abs(d__1));
-/* L30: */
-    }
-    if (*n < 6) {
-       goto L60;
-    }
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 6) {
-       dtemp = dtemp + (d__1 = dx[i__], abs(d__1)) + (d__2 = dx[i__ + 1], 
-               abs(d__2)) + (d__3 = dx[i__ + 2], abs(d__3)) + (d__4 = dx[i__ 
-               + 3], abs(d__4)) + (d__5 = dx[i__ + 4], abs(d__5)) + (d__6 = 
-               dx[i__ + 5], abs(d__6));
-/* L50: */
-    }
-    ret_val = dtemp;
-    return ret_val;
-} /* dasum_ */
-/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx, 
-       integer *incx, doublereal *dy, integer *incy)
-    /* System generated locals */
-    integer i__1;
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-/*     constant times a vector plus a vector. */
-/*     uses unrolled loops for increments equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*da == 0.) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dy[iy] += *da * dx[ix];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*        code for both increments equal to 1 */
-/*        clean-up loop */
-    m = *n % 4;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dy[i__] += *da * dx[i__];
-/* L30: */
-    }
-    if (*n < 4) {
-       return 0;
-    }
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 4) {
-       dy[i__] += *da * dx[i__];
-       dy[i__ + 1] += *da * dx[i__ + 1];
-       dy[i__ + 2] += *da * dx[i__ + 2];
-       dy[i__ + 3] += *da * dx[i__ + 3];
-/* L50: */
-    }
-    return 0;
-} /* daxpy_ */
-doublereal dcabs1_(doublecomplex *z__)
-    /* System generated locals */
-    doublereal ret_val;
-    static doublecomplex equiv_0[1];
-    /* Local variables */
-#define t ((doublereal *)equiv_0)
-#define zz (equiv_0)
-    zz->r = z__->r, zz->i = z__->i;
-    ret_val = abs(t[0]) + abs(t[1]);
-    return ret_val;
-} /* dcabs1_ */
-#undef zz
-#undef t
-/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx, 
-       doublereal *dy, integer *incy)
-    /* System generated locals */
-    integer i__1;
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-/*     copies a vector, x, to a vector, y. */
-/*     uses unrolled loops for increments equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dy[iy] = dx[ix];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*        code for both increments equal to 1 */
-/*        clean-up loop */
-    m = *n % 7;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dy[i__] = dx[i__];
-/* L30: */
-    }
-    if (*n < 7) {
-       return 0;
-    }
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 7) {
-       dy[i__] = dx[i__];
-       dy[i__ + 1] = dx[i__ + 1];
-       dy[i__ + 2] = dx[i__ + 2];
-       dy[i__ + 3] = dx[i__ + 3];
-       dy[i__ + 4] = dx[i__ + 4];
-       dy[i__ + 5] = dx[i__ + 5];
-       dy[i__ + 6] = dx[i__ + 6];
-/* L50: */
-    }
-    return 0;
-} /* dcopy_ */
-doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, 
-       integer *incy)
-    /* System generated locals */
-    integer i__1;
-    doublereal ret_val;
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static doublereal dtemp;
-/*     forms the dot product of two vectors. */
-/*     uses unrolled loops for increments equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-    /* Function Body */
-    ret_val = 0.;
-    dtemp = 0.;
-    if (*n <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp += dx[ix] * dy[iy];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    ret_val = dtemp;
-    return ret_val;
-/*        code for both increments equal to 1 */
-/*        clean-up loop */
-    m = *n % 5;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp += dx[i__] * dy[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-       goto L60;
-    }
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 5) {
-       dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[
-               i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + 
-               4] * dy[i__ + 4];
-/* L50: */
-    }
-    ret_val = dtemp;
-    return ret_val;
-} /* ddot_ */
-/* Subroutine */ int dgbmv_(char *trans, integer *m, integer *n, integer *kl, 
-       integer *ku, doublereal *alpha, doublereal *a, integer *lda, 
-       doublereal *x, integer *incx, doublereal *beta, doublereal *y, 
-       integer *incy, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    /* Local variables */
-    static integer i__, j, k, ix, iy, jx, jy, kx, ky, kup1, info;
-    static doublereal temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DGBMV  performs one of the matrix-vector operations */
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n band matrix, with kl sub-diagonals and ku super-diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-/*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  KL     - INTEGER. */
-/*           On entry, KL specifies the number of sub-diagonals of the */
-/*           matrix A. KL must satisfy  0 .le. KL. */
-/*           Unchanged on exit. */
-/*  KU     - INTEGER. */
-/*           On entry, KU specifies the number of super-diagonals of the */
-/*           matrix A. KU must satisfy  0 .le. KU. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading ( kl + ku + 1 ) by n part of the */
-/*           array A must contain the matrix of coefficients, supplied */
-/*           column by column, with the leading diagonal of the matrix in */
-/*           row ( ku + 1 ) of the array, the first super-diagonal */
-/*           starting at position 2 in row ku, the first sub-diagonal */
-/*           starting at position 1 in row ( ku + 2 ), and so on. */
-/*           Elements in the array A that do not correspond to elements */
-/*           in the band matrix (such as the top left ku by ku triangle) */
-/*           are not referenced. */
-/*           The following program segment will transfer a band matrix */
-/*           from conventional full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    K = KU + 1 - J */
-/*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */
-/*                       A( K + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( kl + ku + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*kl < 0) {
-       info = 4;
-    } else if (*ku < 0) {
-       info = 5;
-    } else if (*lda < *kl + *ku + 1) {
-       info = 8;
-    } else if (*incx == 0) {
-       info = 10;
-    } else if (*incy == 0) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("DGBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the band part of A. */
-/*     First form  y := beta*y. */
-    if (*beta != 1.) {
-       if (*incy == 1) {
-           if (*beta == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.) {
-       return 0;
-    }
-    kup1 = *ku + 1;
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       y[i__] += temp * a[k + i__ + j * a_dim1];
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   iy = ky;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       y[iy] += temp * a[k + i__ + j * a_dim1];
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               if (j > *ku) {
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.;
-               k = kup1 - j;
-/* Computing MAX */
-               i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-               i__5 = *m, i__6 = j + *kl;
-               i__2 = min(i__5,i__6);
-               for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                   temp += a[k + i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L100: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.;
-               ix = kx;
-               k = kup1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-               i__5 = *m, i__6 = j + *kl;
-               i__4 = min(i__5,i__6);
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   temp += a[k + i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-/* L110: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-               if (j > *ku) {
-                   kx += *incx;
-               }
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of DGBMV . */
-} /* dgbmv_ */
-/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer *
-       n, integer *k, doublereal *alpha, doublereal *a, integer *lda, 
-       doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, 
-       integer *ldc, ftnlen transa_len, ftnlen transb_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-    /* Local variables */
-    static integer i__, j, l, info;
-    static logical nota, notb;
-    static doublereal temp;
-    static integer ncola;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa, nrowb;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DGEMM  performs one of the matrix-matrix operations */
-/*     C := alpha*op( A )*op( B ) + beta*C, */
-/*  where  op( X ) is one of */
-/*     op( X ) = X   or   op( X ) = X', */
-/*  alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
-/*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n',  op( A ) = A. */
-/*              TRANSA = 'T' or 't',  op( A ) = A'. */
-/*              TRANSA = 'C' or 'c',  op( A ) = A'. */
-/*           Unchanged on exit. */
-/*           On entry, TRANSB specifies the form of op( B ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSB = 'N' or 'n',  op( B ) = B. */
-/*              TRANSB = 'T' or 't',  op( B ) = B'. */
-/*              TRANSB = 'C' or 'c',  op( B ) = B'. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies  the number  of rows  of the  matrix */
-/*           op( A )  and of the  matrix  C.  M  must  be at least  zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N  specifies the number  of columns of the matrix */
-/*           op( B ) and the number of columns of the matrix C. N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry,  K  specifies  the number of columns of the matrix */
-/*           op( A ) and the number of rows of the matrix op( B ). K must */
-/*           be at least  zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise. */
-/*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by m  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */
-/*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise. */
-/*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  n by k  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then */
-/*           LDB must be at least  max( 1, k ), otherwise  LDB must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-/*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n  matrix */
-/*           ( alpha*op( A )*op( B ) + beta*C ). */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not */
-/*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows */
-/*     and  columns of  A  and the  number of  rows  of  B  respectively. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    nota = lsame_(transa, "N", (ftnlen)1, (ftnlen)1);
-    notb = lsame_(transb, "N", (ftnlen)1, (ftnlen)1);
-    if (nota) {
-       nrowa = *m;
-       ncola = *k;
-    } else {
-       nrowa = *k;
-       ncola = *m;
-    }
-    if (notb) {
-       nrowb = *k;
-    } else {
-       nrowb = *n;
-    }
-/*     Test the input parameters. */
-    info = 0;
-    if (! nota && ! lsame_(transa, "C", (ftnlen)1, (ftnlen)1) && ! lsame_(
-           transa, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! notb && ! lsame_(transb, "C", (ftnlen)1, (ftnlen)1) && ! 
-           lsame_(transb, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < max(1,nrowa)) {
-       info = 8;
-    } else if (*ldb < max(1,nrowb)) {
-       info = 10;
-    } else if (*ldc < max(1,*m)) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("DGEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
-       return 0;
-    }
-/*     And if */
-    if (*alpha == 0.) {
-       if (*beta == 0.) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = 0.;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (notb) {
-       if (nota) {
-/*           Form  C := alpha*A*B + beta*C. */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L50: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L60: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (b[l + j * b_dim1] != 0.) {
-                       temp = *alpha * b[l + j * b_dim1];
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L70: */
-                       }
-                   }
-/* L80: */
-               }
-/* L90: */
-           }
-       } else {
-/*           Form  C := alpha*A'*B + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-/* L100: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L110: */
-               }
-/* L120: */
-           }
-       }
-    } else {
-       if (nota) {
-/*           Form  C := alpha*A*B' + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L130: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L140: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (b[j + l * b_dim1] != 0.) {
-                       temp = *alpha * b[j + l * b_dim1];
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L150: */
-                       }
-                   }
-/* L160: */
-               }
-/* L170: */
-           }
-       } else {
-/*           Form  C := alpha*A'*B' + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
-/* L180: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L190: */
-               }
-/* L200: */
-           }
-       }
-    }
-    return 0;
-/*     End of DGEMM . */
-} /* dgemm_ */
-/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal *
-       alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, 
-       doublereal *beta, doublereal *y, integer *incy, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DGEMV  performs one of the matrix-vector operations */
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-/*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry with BETA non-zero, the incremented array Y */
-/*           must contain the vector y. On exit, Y is overwritten by the */
-/*           updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*lda < max(1,*m)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("DGEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-/*     First form  y := beta*y. */
-    if (*beta != 1.) {
-       if (*incy == 1) {
-           if (*beta == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.) {
-       return 0;
-    }
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       y[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   iy = ky;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       y[iy] += temp * a[i__ + j * a_dim1];
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L100: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp += a[i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-/* L110: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of DGEMV . */
-} /* dgemv_ */
-/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha, 
-       doublereal *x, integer *incx, doublereal *y, integer *incy, 
-       doublereal *a, integer *lda)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static doublereal temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DGER   performs the rank 1 operation */
-/*     A := alpha*x*y' + A, */
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("DGER  ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || *alpha == 0.) {
-       return 0;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           if (y[jy] != 0.) {
-               temp = *alpha * y[jy];
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           if (y[jy] != 0.) {
-               temp = *alpha * y[jy];
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   a[i__ + j * a_dim1] += x[ix] * temp;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-    return 0;
-/*     End of DGER  . */
-} /* dger_ */
-doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
-    /* System generated locals */
-    integer i__1, i__2;
-    doublereal ret_val, d__1;
-    /* Builtin functions */
-    double sqrt(doublereal);
-    /* Local variables */
-    static integer ix;
-    static doublereal ssq, norm, scale, absxi;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  DNRM2 returns the euclidean norm of a vector via the function */
-/*  name, so that */
-/*     DNRM2 := sqrt( x'*x ) */
-/*  -- This version written on 25-October-1982. */
-/*     Modified on 14-October-1993 to inline the call to DLASSQ. */
-/*     Sven Hammarling, Nag Ltd. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-    /* Parameter adjustments */
-    --x;
-    /* Function Body */
-    if (*n < 1 || *incx < 1) {
-       norm = 0.;
-    } else if (*n == 1) {
-       norm = abs(x[1]);
-    } else {
-       scale = 0.;
-       ssq = 1.;
-/*        The following loop is equivalent to this call to the LAPACK */
-/*        auxiliary routine: */
-/*        CALL DLASSQ( N, X, INCX, SCALE, SSQ ) */
-       i__1 = (*n - 1) * *incx + 1;
-       i__2 = *incx;
-       for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-           if (x[ix] != 0.) {
-               absxi = (d__1 = x[ix], abs(d__1));
-               if (scale < absxi) {
-/* Computing 2nd power */
-                   d__1 = scale / absxi;
-                   ssq = ssq * (d__1 * d__1) + 1.;
-                   scale = absxi;
-               } else {
-/* Computing 2nd power */
-                   d__1 = absxi / scale;
-                   ssq += d__1 * d__1;
-               }
-           }
-/* L10: */
-       }
-       norm = scale * sqrt(ssq);
-    }
-    ret_val = norm;
-    return ret_val;
-/*     End of DNRM2. */
-} /* dnrm2_ */
-/* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx, 
-       doublereal *dy, integer *incy, doublereal *c__, doublereal *s)
-    /* System generated locals */
-    integer i__1;
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublereal dtemp;
-/*     applies a plane rotation. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp = *c__ * dx[ix] + *s * dy[iy];
-       dy[iy] = *c__ * dy[iy] - *s * dx[ix];
-       dx[ix] = dtemp;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*       code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp = *c__ * dx[i__] + *s * dy[i__];
-       dy[i__] = *c__ * dy[i__] - *s * dx[i__];
-       dx[i__] = dtemp;
-/* L30: */
-    }
-    return 0;
-} /* drot_ */
-/* Subroutine */ int drotg_(doublereal *da, doublereal *db, doublereal *c__, 
-       doublereal *s)
-    /* System generated locals */
-    doublereal d__1, d__2;
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-    /* Local variables */
-    static doublereal r__, z__, roe, scale;
-/*     construct givens plane rotation. */
-/*     jack dongarra, linpack, 3/11/78. */
-    roe = *db;
-    if (abs(*da) > abs(*db)) {
-       roe = *da;
-    }
-    scale = abs(*da) + abs(*db);
-    if (scale != 0.) {
-       goto L10;
-    }
-    *c__ = 1.;
-    *s = 0.;
-    r__ = 0.;
-    z__ = 0.;
-    goto L20;
-/* Computing 2nd power */
-    d__1 = *da / scale;
-/* Computing 2nd power */
-    d__2 = *db / scale;
-    r__ = scale * sqrt(d__1 * d__1 + d__2 * d__2);
-    r__ = d_sign(&c_b876, &roe) * r__;
-    *c__ = *da / r__;
-    *s = *db / r__;
-    z__ = 1.;
-    if (abs(*da) > abs(*db)) {
-       z__ = *s;
-    }
-    if (abs(*db) >= abs(*da) && *c__ != 0.) {
-       z__ = 1. / *c__;
-    }
-    *da = r__;
-    *db = z__;
-    return 0;
-} /* drotg_ */
-/* Subroutine */ int drotm_(integer *n, doublereal *dx, integer *incx, 
-       doublereal *dy, integer *incy, doublereal *dparam)
-    /* Initialized data */
-    static doublereal zero = 0.;
-    static doublereal two = 2.;
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__;
-    static doublereal w, z__;
-    static integer kx, ky;
-    static doublereal dh11, dh12, dh22, dh21, dflag;
-    static integer nsteps;
-/*     (DY**T) */
-/*     DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
-/*     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0 */
-/*       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0). */
-    /* Parameter adjustments */
-    --dparam;
-    --dy;
-    --dx;
-    /* Function Body */
-    dflag = dparam[1];
-    if (*n <= 0 || dflag + two == zero) {
-       goto L140;
-    }
-    if (! (*incx == *incy && *incx > 0)) {
-       goto L70;
-    }
-    nsteps = *n * *incx;
-    if (dflag < 0.) {
-       goto L50;
-    } else if (dflag == 0) {
-       goto L10;
-    } else {
-       goto L30;
-    }
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       w = dx[i__];
-       z__ = dy[i__];
-       dx[i__] = w + z__ * dh12;
-       dy[i__] = w * dh21 + z__;
-/* L20: */
-    }
-    goto L140;
-    dh11 = dparam[2];
-    dh22 = dparam[5];
-    i__2 = nsteps;
-    i__1 = *incx;
-    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-       w = dx[i__];
-       z__ = dy[i__];
-       dx[i__] = w * dh11 + z__;
-       dy[i__] = -w + dh22 * z__;
-/* L40: */
-    }
-    goto L140;
-    dh11 = dparam[2];
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    dh22 = dparam[5];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       w = dx[i__];
-       z__ = dy[i__];
-       dx[i__] = w * dh11 + z__ * dh12;
-       dy[i__] = w * dh21 + z__ * dh22;
-/* L60: */
-    }
-    goto L140;
-    kx = 1;
-    ky = 1;
-    if (*incx < 0) {
-       kx = (1 - *n) * *incx + 1;
-    }
-    if (*incy < 0) {
-       ky = (1 - *n) * *incy + 1;
-    }
-    if (dflag < 0.) {
-       goto L120;
-    } else if (dflag == 0) {
-       goto L80;
-    } else {
-       goto L100;
-    }
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = dx[kx];
-       z__ = dy[ky];
-       dx[kx] = w + z__ * dh12;
-       dy[ky] = w * dh21 + z__;
-       kx += *incx;
-       ky += *incy;
-/* L90: */
-    }
-    goto L140;
-    dh11 = dparam[2];
-    dh22 = dparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = dx[kx];
-       z__ = dy[ky];
-       dx[kx] = w * dh11 + z__;
-       dy[ky] = -w + dh22 * z__;
-       kx += *incx;
-       ky += *incy;
-/* L110: */
-    }
-    goto L140;
-    dh11 = dparam[2];
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    dh22 = dparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = dx[kx];
-       z__ = dy[ky];
-       dx[kx] = w * dh11 + z__ * dh12;
-       dy[ky] = w * dh21 + z__ * dh22;
-       kx += *incx;
-       ky += *incy;
-/* L130: */
-    }
-    return 0;
-} /* drotm_ */
-/* Subroutine */ int drotmg_(doublereal *dd1, doublereal *dd2, doublereal *
-       dx1, doublereal *dy1, doublereal *dparam)
-    /* Initialized data */
-    static doublereal zero = 0.;
-    static doublereal one = 1.;
-    static doublereal two = 2.;
-    static doublereal gam = 4096.;
-    static doublereal gamsq = 16777216.;
-    static doublereal rgamsq = 5.9604645e-8;
-    /* Format strings */
-    static char fmt_120[] = "";
-    static char fmt_150[] = "";
-    static char fmt_180[] = "";
-    static char fmt_210[] = "";
-    /* System generated locals */
-    doublereal d__1;
-    /* Local variables */
-    static doublereal du, dp1, dp2, dq2, dq1, dh11, dh21, dh12, dh22;
-    static integer igo;
-    static doublereal dflag, dtemp;
-    /* Assigned format variables */
-    static char *igo_fmt;
-/*     DY2)**T. */
-/*     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0 */
-/*       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0). */
-    /* Parameter adjustments */
-    --dparam;
-    /* Function Body */
-    if (! (*dd1 < zero)) {
-       goto L10;
-    }
-/*       GO ZERO-H-D-AND-DX1.. */
-    goto L60;
-    dp2 = *dd2 * *dy1;
-    if (! (dp2 == zero)) {
-       goto L20;
-    }
-    dflag = -two;
-    goto L260;
-/*     REGULAR-CASE.. */
-    dp1 = *dd1 * *dx1;
-    dq2 = dp2 * *dy1;
-    dq1 = dp1 * *dx1;
-    if (! (abs(dq1) > abs(dq2))) {
-       goto L40;
-    }
-    dh21 = -(*dy1) / *dx1;
-    dh12 = dp2 / dp1;
-    du = one - dh12 * dh21;
-    if (! (du <= zero)) {
-       goto L30;
-    }
-/*         GO ZERO-H-D-AND-DX1.. */
-    goto L60;
-    dflag = zero;
-    *dd1 /= du;
-    *dd2 /= du;
-    *dx1 *= du;
-/*         GO SCALE-CHECK.. */
-    goto L100;
-    if (! (dq2 < zero)) {
-       goto L50;
-    }
-/*         GO ZERO-H-D-AND-DX1.. */
-    goto L60;
-    dflag = one;
-    dh11 = dp1 / dp2;
-    dh22 = *dx1 / *dy1;
-    du = one + dh11 * dh22;
-    dtemp = *dd2 / du;
-    *dd2 = *dd1 / du;
-    *dd1 = dtemp;
-    *dx1 = *dy1 * du;
-/*         GO SCALE-CHECK */
-    goto L100;
-/*     PROCEDURE..ZERO-H-D-AND-DX1.. */
-    dflag = -one;
-    dh11 = zero;
-    dh12 = zero;
-    dh21 = zero;
-    dh22 = zero;
-    *dd1 = zero;
-    *dd2 = zero;
-    *dx1 = zero;
-/*         RETURN.. */
-    goto L220;
-/*     PROCEDURE..FIX-H.. */
-    if (! (dflag >= zero)) {
-       goto L90;
-    }
-    if (! (dflag == zero)) {
-       goto L80;
-    }
-    dh11 = one;
-    dh22 = one;
-    dflag = -one;
-    goto L90;
-    dh21 = -one;
-    dh12 = one;
-    dflag = -one;
-    switch (igo) {
-       case 0: goto L120;
-       case 1: goto L150;
-       case 2: goto L180;
-       case 3: goto L210;
-    }
-    if (! (*dd1 <= rgamsq)) {
-       goto L130;
-    }
-    if (*dd1 == zero) {
-       goto L160;
-    }
-    igo = 0;
-    igo_fmt = fmt_120;
-/*              FIX-H.. */
-    goto L70;
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd1 *= d__1 * d__1;
-    *dx1 /= gam;
-    dh11 /= gam;
-    dh12 /= gam;
-    goto L110;
-    if (! (*dd1 >= gamsq)) {
-       goto L160;
-    }
-    igo = 1;
-    igo_fmt = fmt_150;
-/*              FIX-H.. */
-    goto L70;
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd1 /= d__1 * d__1;
-    *dx1 *= gam;
-    dh11 *= gam;
-    dh12 *= gam;
-    goto L140;
-    if (! (abs(*dd2) <= rgamsq)) {
-       goto L190;
-    }
-    if (*dd2 == zero) {
-       goto L220;
-    }
-    igo = 2;
-    igo_fmt = fmt_180;
-/*              FIX-H.. */
-    goto L70;
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd2 *= d__1 * d__1;
-    dh21 /= gam;
-    dh22 /= gam;
-    goto L170;
-    if (! (abs(*dd2) >= gamsq)) {
-       goto L220;
-    }
-    igo = 3;
-    igo_fmt = fmt_210;
-/*              FIX-H.. */
-    goto L70;
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd2 /= d__1 * d__1;
-    dh21 *= gam;
-    dh22 *= gam;
-    goto L200;
-    if (dflag < 0.) {
-       goto L250;
-    } else if (dflag == 0) {
-       goto L230;
-    } else {
-       goto L240;
-    }
-    dparam[3] = dh21;
-    dparam[4] = dh12;
-    goto L260;
-    dparam[2] = dh11;
-    dparam[5] = dh22;
-    goto L260;
-    dparam[2] = dh11;
-    dparam[3] = dh21;
-    dparam[4] = dh12;
-    dparam[5] = dh22;
-    dparam[1] = dflag;
-    return 0;
-} /* drotmg_ */
-/* Subroutine */ int dsbmv_(char *uplo, integer *n, integer *k, doublereal *
-       alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, 
-       doublereal *beta, doublereal *y, integer *incy, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    /* Local variables */
-    static integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DSBMV  performs the matrix-vector  operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric band matrix, with k super-diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  Y      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*k < 0) {
-       info = 3;
-    } else if (*lda < *k + 1) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("DSBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-/*     First form  y := beta*y. */
-    if (*beta != 1.) {
-       if (*incy == 1) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when upper triangle of A is stored. */
-       kplus1 = *k + 1;
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *k;
-               i__4 = j - 1;
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   y[i__] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               ix = kx;
-               iy = ky;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__4 = 1, i__2 = j - *k;
-               i__3 = j - 1;
-               for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                   y[iy] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * 
-                       temp2;
-               jx += *incx;
-               jy += *incy;
-               if (j > *k) {
-                   kx += *incx;
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when lower triangle of A is stored. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               y[j] += temp1 * a[j * a_dim1 + 1];
-               l = 1 - j;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   y[i__] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               y[jy] += temp1 * a[j * a_dim1 + 1];
-               l = 1 - j;
-               ix = jx;
-               iy = jy;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of DSBMV . */
-} /* dsbmv_ */
-/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx, 
-       integer *incx)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, m, mp1, nincx;
-/*     scales a vector by a constant. */
-/*     uses unrolled loops for increment equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --dx;
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       dx[i__] = *da * dx[i__];
-/* L10: */
-    }
-    return 0;
-/*        code for increment equal to 1 */
-/*        clean-up loop */
-    m = *n % 5;
-    if (m == 0) {
-       goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       dx[i__] = *da * dx[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-       return 0;
-    }
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 5) {
-       dx[i__] = *da * dx[i__];
-       dx[i__ + 1] = *da * dx[i__ + 1];
-       dx[i__ + 2] = *da * dx[i__ + 2];
-       dx[i__ + 3] = *da * dx[i__ + 3];
-       dx[i__ + 4] = *da * dx[i__ + 4];
-/* L50: */
-    }
-    return 0;
-} /* dscal_ */
-doublereal dsdot_(integer *n, real *sx, integer *incx, real *sy, integer *
-       incy)
-    /* System generated locals */
-    integer i__1, i__2;
-    doublereal ret_val;
-    /* Local variables */
-    static integer i__, ns, kx, ky;
-/* ***PURPOSE  Compute the inner product of two vectors with extended */
-/*            precision accumulation and result. */
-/* ***CATEGORY  D1A4 */
-/*             LINEAR ALGEBRA, VECTOR */
-/* ***AUTHOR  Lawson, C. L., (JPL) */
-/*           Hanson, R. J., (SNLA) */
-/*           Kincaid, D. R., (U. of Texas) */
-/*           Krogh, F. T., (JPL) */
-/*                B L A S  Subprogram */
-/*    Description of Parameters */
-/*     --Input-- */
-/*        N  number of elements in input vector(s) */
-/*       SX  single precision vector with N elements */
-/*     INCX  storage spacing between elements of SX */
-/*       SY  single precision vector with N elements */
-/*     INCY  storage spacing between elements of SY */
-/*     --Output-- */
-/*    DSDOT  double precision dot product (zero if N.LE.0) */
-/*     Returns D.P. dot product accumulated in D.P., for S.P. SX and SY */
-/*     DSDOT = sum for I = 0 to N-1 of  SX(LX+I*INCX) * SY(LY+I*INCY), */
-/*     where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is */
-/*     defined in a similar way using INCY. */
-/* ***REFERENCES  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. */
-/*                 Krogh, Basic linear algebra subprograms for Fortran */
-/*                 usage, Algorithm No. 539, Transactions on Mathematical */
-/*                 Software 5, 3 (September 1979), pp. 308-323. */
-/*   791001  DATE WRITTEN */
-/*   890831  Modified array declarations.  (WRB) */
-/*   890831  REVISION DATE from Version 3.2 */
-/*   891214  Prologue converted to Version 4.0 format.  (BAB) */
-/*   920310  Corrected definition of LX in DESCRIPTION.  (WRB) */
-/*   920501  Reformatted the REFERENCES section.  (WRB) */
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-    /* Function Body */
-    ret_val = 0.;
-    if (*n <= 0) {
-       return ret_val;
-    }
-    if (*incx == *incy && *incx > 0) {
-       goto L20;
-    }
-/*     Code for unequal or nonpositive increments. */
-    kx = 1;
-    ky = 1;
-    if (*incx < 0) {
-       kx = (1 - *n) * *incx + 1;
-    }
-    if (*incy < 0) {
-       ky = (1 - *n) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       ret_val += (doublereal) sx[kx] * (doublereal) sy[ky];
-       kx += *incx;
-       ky += *incy;
-/* L10: */
-    }
-    return ret_val;
-/*     Code for equal, positive, non-unit increments. */
-    ns = *n * *incx;
-    i__1 = ns;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       ret_val += (doublereal) sx[i__] * (doublereal) sy[i__];
-/* L30: */
-    }
-    return ret_val;
-} /* dsdot_ */
-/* Subroutine */ int dspmv_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *ap, doublereal *x, integer *incx, doublereal *beta, 
-       doublereal *y, integer *incy, ftnlen uplo_len)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DSPMV  performs the matrix-vector operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 6;
-    } else if (*incy == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("DSPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-/*     First form  y := beta*y. */
-    if (*beta != 1.) {
-       if (*incy == 1) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.) {
-       return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when AP contains the upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               k = kk;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * ap[k];
-                   temp2 += ap[k] * x[i__];
-                   ++k;
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-               kk += j;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               ix = kx;
-               iy = ky;
-               i__2 = kk + j - 2;
-               for (k = kk; k <= i__2; ++k) {
-                   y[iy] += temp1 * ap[k];
-                   temp2 += ap[k] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when AP contains the lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               y[j] += temp1 * ap[kk];
-               k = kk + 1;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * ap[k];
-                   temp2 += ap[k] * x[i__];
-                   ++k;
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-               kk += *n - j + 1;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               y[jy] += temp1 * ap[kk];
-               ix = jx;
-               iy = jy;
-               i__2 = kk + *n - j;
-               for (k = kk + 1; k <= i__2; ++k) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * ap[k];
-                   temp2 += ap[k] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-               kk += *n - j + 1;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of DSPMV . */
-} /* dspmv_ */
-/* Subroutine */ int dspr_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *x, integer *incx, doublereal *ap, ftnlen uplo_len)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DSPR    performs the symmetric rank 1 operation */
-/*     A := alpha*x*x' + A, */
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n symmetric matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --ap;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    }
-    if (info != 0) {
-       xerbla_("DSPR  ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-/*     Set the start point in X if the increment is not unity. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when upper triangle is stored in AP. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.) {
-                   temp = *alpha * x[j];
-                   k = kk;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       ap[k] += x[i__] * temp;
-                       ++k;
-/* L10: */
-                   }
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   ix = kx;
-                   i__2 = kk + j - 1;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] += x[ix] * temp;
-                       ix += *incx;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when lower triangle is stored in AP. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.) {
-                   temp = *alpha * x[j];
-                   k = kk;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       ap[k] += x[i__] * temp;
-                       ++k;
-/* L50: */
-                   }
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   ix = jx;
-                   i__2 = kk + *n - j;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] += x[ix] * temp;
-                       ix += *incx;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of DSPR  . */
-} /* dspr_ */
-/* Subroutine */ int dspr2_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *x, integer *incx, doublereal *y, integer *incy, 
-       doublereal *ap, ftnlen uplo_len)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DSPR2  performs the symmetric rank 2 operation */
-/*     A := alpha*x*y' + alpha*y*x' + A, */
-/*  where alpha is a scalar, x and y are n element vectors and A is an */
-/*  n by n symmetric matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --ap;
-    --y;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("DSPR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when upper triangle is stored in AP. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0. || y[j] != 0.) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   k = kk;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2;
-                       ++k;
-/* L10: */
-                   }
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0. || y[jy] != 0.) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = kx;
-                   iy = ky;
-                   i__2 = kk + j - 1;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when lower triangle is stored in AP. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0. || y[j] != 0.) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   k = kk;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2;
-                       ++k;
-/* L50: */
-                   }
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0. || y[jy] != 0.) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = jx;
-                   iy = jy;
-                   i__2 = kk + *n - j;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of DSPR2 . */
-} /* dspr2_ */
-/* Subroutine */ int dswap_(integer *n, doublereal *dx, integer *incx, 
-       doublereal *dy, integer *incy)
-    /* System generated locals */
-    integer i__1;
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static doublereal dtemp;
-/*     interchanges two vectors. */
-/*     uses unrolled loops for increments equal one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp = dx[ix];
-       dx[ix] = dy[iy];
-       dy[iy] = dtemp;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*       code for both increments equal to 1 */
-/*       clean-up loop */
-    m = *n % 3;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp = dx[i__];
-       dx[i__] = dy[i__];
-       dy[i__] = dtemp;
-/* L30: */
-    }
-    if (*n < 3) {
-       return 0;
-    }
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 3) {
-       dtemp = dx[i__];
-       dx[i__] = dy[i__];
-       dy[i__] = dtemp;
-       dtemp = dx[i__ + 1];
-       dx[i__ + 1] = dy[i__ + 1];
-       dy[i__ + 1] = dtemp;
-       dtemp = dx[i__ + 2];
-       dx[i__ + 2] = dy[i__ + 2];
-       dy[i__ + 2] = dtemp;
-/* L50: */
-    }
-    return 0;
-} /* dswap_ */
-/* Subroutine */ int dsymm_(char *side, char *uplo, integer *m, integer *n, 
-       doublereal *alpha, doublereal *a, integer *lda, doublereal *b, 
-       integer *ldb, doublereal *beta, doublereal *c__, integer *ldc, ftnlen 
-       side_len, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DSYMM  performs one of the matrix-matrix operations */
-/*     C := alpha*A*B + beta*C, */
-/*  or */
-/*     C := alpha*B*A + beta*C, */
-/*  where alpha and beta are scalars,  A is a symmetric matrix and  B and */
-/*  C are  m by n matrices. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  symmetric matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  symmetric  matrix   A  is  to  be */
-/*           referenced as follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is  n otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-/*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Set NROWA as the number of rows of A. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-/*     Test the input parameters. */
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("DSYMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.) {
-       if (*beta == 0.) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = 0.;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*B + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1 = *alpha * b[i__ + j * b_dim1];
-                   temp2 = 0.;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1];
-                       temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1];
-/* L50: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] 
-                               + *alpha * temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + temp1 * a[i__ + i__ * a_dim1] + *alpha * 
-                               temp2;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   temp1 = *alpha * b[i__ + j * b_dim1];
-                   temp2 = 0.;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1];
-                       temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1];
-/* L80: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] 
-                               + *alpha * temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + temp1 * a[i__ + i__ * a_dim1] + *alpha * 
-                               temp2;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*B*A + beta*C. */
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           temp1 = *alpha * a[j + j * a_dim1];
-           if (*beta == 0.) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = temp1 * b[i__ + j * b_dim1];
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + 
-                           temp1 * b[i__ + j * b_dim1];
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   temp1 = *alpha * a[k + j * a_dim1];
-               } else {
-                   temp1 = *alpha * a[j + k * a_dim1];
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1];
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   temp1 = *alpha * a[j + k * a_dim1];
-               } else {
-                   temp1 = *alpha * a[k + j * a_dim1];
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1];
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-    return 0;
-/*     End of DSYMM . */
-} /* dsymm_ */
-/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal 
-       *beta, doublereal *y, integer *incy, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DSYMV  performs the matrix-vector  operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*lda < max(1,*n)) {
-       info = 5;
-    } else if (*incx == 0) {
-       info = 7;
-    } else if (*incy == 0) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("DSYMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-/*     First form  y := beta*y. */
-    if (*beta != 1.) {
-       if (*incy == 1) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when A is stored in upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               ix = kx;
-               iy = ky;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[iy] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when A is stored in lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               y[j] += temp1 * a[j + j * a_dim1];
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               y[jy] += temp1 * a[j + j * a_dim1];
-               ix = jx;
-               iy = jy;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of DSYMV . */
-} /* dsymv_ */
-/* Subroutine */ int dsyr_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *x, integer *incx, doublereal *a, integer *lda, ftnlen 
-       uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DSYR   performs the symmetric rank 1 operation */
-/*     A := alpha*x*x' + A, */
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n symmetric matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*lda < max(1,*n)) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("DSYR  ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-/*     Set the start point in X if the increment is not unity. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when A is stored in upper triangle. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.) {
-                   temp = *alpha * x[j];
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-                   }
-               }
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   ix = kx;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[ix] * temp;
-                       ix += *incx;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when A is stored in lower triangle. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.) {
-                   temp = *alpha * x[j];
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[i__] * temp;
-/* L50: */
-                   }
-               }
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   ix = jx;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[ix] * temp;
-                       ix += *incx;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of DSYR  . */
-} /* dsyr_ */
-/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *x, integer *incx, doublereal *y, integer *incy, 
-       doublereal *a, integer *lda, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DSYR2  performs the symmetric rank 2 operation */
-/*     A := alpha*x*y' + alpha*y*x' + A, */
-/*  where alpha is a scalar, x and y are n element vectors and A is an n */
-/*  by n symmetric matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*n)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("DSYR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when A is stored in the upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0. || y[j] != 0.) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * 
-                               temp1 + y[i__] * temp2;
-/* L10: */
-                   }
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0. || y[jy] != 0.) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = kx;
-                   iy = ky;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * 
-                               temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when A is stored in the lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0. || y[j] != 0.) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * 
-                               temp1 + y[i__] * temp2;
-/* L50: */
-                   }
-               }
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0. || y[jy] != 0.) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = jx;
-                   iy = jy;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * 
-                               temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of DSYR2 . */
-} /* dsyr2_ */
-/* Subroutine */ int dsyr2k_(char *uplo, char *trans, integer *n, integer *k, 
-       doublereal *alpha, doublereal *a, integer *lda, doublereal *b, 
-       integer *ldb, doublereal *beta, doublereal *c__, integer *ldc, ftnlen 
-       uplo_len, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DSYR2K  performs one of the symmetric rank 2k operations */
-/*     C := alpha*A*B' + alpha*B*A' + beta*C, */
-/*  or */
-/*     C := alpha*A'*B + alpha*B'*A + beta*C, */
-/*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix */
-/*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n */
-/*  matrices in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' + */
-/*                                        beta*C. */
-/*              TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A + */
-/*                                        beta*C. */
-/*              TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A + */
-/*                                        beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number */
-/*           of rows of the matrices  A and B.  K must be at least  zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("DSYR2K", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.) {
-       if (upper) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L90: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) {
-                       temp1 = *alpha * b[j + l * b_dim1];
-                       temp2 = *alpha * a[j + l * a_dim1];
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-                                   i__ + l * a_dim1] * temp1 + b[i__ + l * 
-                                   b_dim1] * temp2;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L140: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) {
-                       temp1 = *alpha * b[j + l * b_dim1];
-                       temp2 = *alpha * a[j + l * a_dim1];
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-                                   i__ + l * a_dim1] * temp1 + b[i__ + l * 
-                                   b_dim1] * temp2;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1 = 0.;
-                   temp2 = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-                       temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L190: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * 
-                               temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + *alpha * temp1 + *alpha * temp2;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1 = 0.;
-                   temp2 = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-                       temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L220: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * 
-                               temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + *alpha * temp1 + *alpha * temp2;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-    return 0;
-/*     End of DSYR2K. */
-} /* dsyr2k_ */
-/* Subroutine */ int dsyrk_(char *uplo, char *trans, integer *n, integer *k, 
-       doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, 
-       doublereal *c__, integer *ldc, ftnlen uplo_len, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DSYRK  performs one of the symmetric rank k operations */
-/*     C := alpha*A*A' + beta*C, */
-/*  or */
-/*     C := alpha*A'*A + beta*C, */
-/*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix */
-/*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix */
-/*  in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C. */
-/*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C. */
-/*              TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number */
-/*           of rows of the matrix  A.  K must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("DSYRK ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.) {
-       if (upper) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*A' + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L90: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.) {
-                       temp = *alpha * a[j + l * a_dim1];
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L140: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.) {
-                       temp = *alpha * a[j + l * a_dim1];
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*A'*A + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L190: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L220: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-    return 0;
-/*     End of DSYRK . */
-} /* dsyrk_ */
-/* Subroutine */ int dtbmv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx,
-        ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DTBMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("DTBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*         Form  x := A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[kplus1 + j * a_dim1];
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           x[ix] += temp * a[l + i__ + j * a_dim1];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[kplus1 + j * a_dim1];
-                       }
-                   }
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       l = 1 - j;
-/* Computing MIN */
-                       i__1 = *n, i__3 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                           x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j * a_dim1 + 1];
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__4 = *n, i__1 = j + *k;
-                       i__3 = j + 1;
-                       for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                           x[ix] += temp * a[l + i__ + j * a_dim1];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j * a_dim1 + 1];
-                       }
-                   }
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   l = kplus1 - j;
-                   if (nounit) {
-                       temp *= a[kplus1 + j * a_dim1];
-                   }
-/* Computing MAX */
-                   i__4 = 1, i__1 = j - *k;
-                   i__3 = max(i__4,i__1);
-                   for (i__ = j - 1; i__ >= i__3; --i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   kx -= *incx;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (nounit) {
-                       temp *= a[kplus1 + j * a_dim1];
-                   }
-/* Computing MAX */
-                   i__4 = 1, i__1 = j - *k;
-                   i__3 = max(i__4,i__1);
-                   for (i__ = j - 1; i__ >= i__3; --i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   temp = x[j];
-                   l = 1 - j;
-                   if (nounit) {
-                       temp *= a[j * a_dim1 + 1];
-                   }
-/* Computing MIN */
-                   i__1 = *n, i__2 = j + *k;
-                   i__4 = min(i__1,i__2);
-                   for (i__ = j + 1; i__ <= i__4; ++i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   temp = x[jx];
-                   kx += *incx;
-                   ix = kx;
-                   l = 1 - j;
-                   if (nounit) {
-                       temp *= a[j * a_dim1 + 1];
-                   }
-/* Computing MIN */
-                   i__1 = *n, i__2 = j + *k;
-                   i__4 = min(i__1,i__2);
-                   for (i__ = j + 1; i__ <= i__4; ++i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of DTBMV . */
-} /* dtbmv_ */
-/* Subroutine */ int dtbsv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx,
-        ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DTBSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular band matrix, with ( k + 1 ) */
-/*  diagonals. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("DTBSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed by sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       l = kplus1 - j;
-                       if (nounit) {
-                           x[j] /= a[kplus1 + j * a_dim1];
-                       }
-                       temp = x[j];
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           x[i__] -= temp * a[l + i__ + j * a_dim1];
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   kx -= *incx;
-                   if (x[jx] != 0.) {
-                       ix = kx;
-                       l = kplus1 - j;
-                       if (nounit) {
-                           x[jx] /= a[kplus1 + j * a_dim1];
-                       }
-                       temp = x[jx];
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           x[ix] -= temp * a[l + i__ + j * a_dim1];
-                           ix -= *incx;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       l = 1 - j;
-                       if (nounit) {
-                           x[j] /= a[j * a_dim1 + 1];
-                       }
-                       temp = x[j];
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * a[l + i__ + j * a_dim1];
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   kx += *incx;
-                   if (x[jx] != 0.) {
-                       ix = kx;
-                       l = 1 - j;
-                       if (nounit) {
-                           x[jx] /= a[j * a_dim1 + 1];
-                       }
-                       temp = x[jx];
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[ix] -= temp * a[l + i__ + j * a_dim1];
-                           ix += *incx;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A')*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   l = kplus1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *k;
-                   i__4 = j - 1;
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= a[kplus1 + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   l = kplus1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *k;
-                   i__3 = j - 1;
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= a[kplus1 + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   l = 1 - j;
-/* Computing MIN */
-                   i__1 = *n, i__3 = j + *k;
-                   i__4 = j + 1;
-                   for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= a[j * a_dim1 + 1];
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   l = 1 - j;
-/* Computing MIN */
-                   i__4 = *n, i__1 = j + *k;
-                   i__3 = j + 1;
-                   for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= a[j * a_dim1 + 1];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of DTBSV . */
-} /* dtbsv_ */
-/* Subroutine */ int dtpmv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublereal *ap, doublereal *x, integer *incx, ftnlen uplo_len, ftnlen 
-       trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DTPMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("DTPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x:= A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       k = kk;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[i__] += temp * ap[k];
-                           ++k;
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= ap[kk + j - 1];
-                       }
-                   }
-                   kk += j;
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           x[ix] += temp * ap[k];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= ap[kk + j - 1];
-                       }
-                   }
-                   jx += *incx;
-                   kk += j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       k = kk;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[i__] += temp * ap[k];
-                           --k;
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= ap[kk - *n + j];
-                       }
-                   }
-                   kk -= *n - j + 1;
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           x[ix] += temp * ap[k];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= ap[kk - *n + j];
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   k = kk - 1;
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       temp += ap[k] * x[i__];
-                       --k;
-/* L90: */
-                   }
-                   x[j] = temp;
-                   kk -= j;
-/* L100: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   i__1 = kk - j + 1;
-                   for (k = kk - 1; k >= i__1; --k) {
-                       ix -= *incx;
-                       temp += ap[k] * x[ix];
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   kk -= j;
-/* L120: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       temp += ap[k] * x[i__];
-                       ++k;
-/* L130: */
-                   }
-                   x[j] = temp;
-                   kk += *n - j + 1;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       temp += ap[k] * x[ix];
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of DTPMV . */
-} /* dtpmv_ */
-/* Subroutine */ int dtpsv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublereal *ap, doublereal *x, integer *incx, ftnlen uplo_len, ftnlen 
-       trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DTPSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix, supplied in packed form. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("DTPSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       if (nounit) {
-                           x[j] /= ap[kk];
-                       }
-                       temp = x[j];
-                       k = kk - 1;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           x[i__] -= temp * ap[k];
-                           --k;
-/* L10: */
-                       }
-                   }
-                   kk -= j;
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.) {
-                       if (nounit) {
-                           x[jx] /= ap[kk];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           x[ix] -= temp * ap[k];
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       if (nounit) {
-                           x[j] /= ap[kk];
-                       }
-                       temp = x[j];
-                       k = kk + 1;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * ap[k];
-                           ++k;
-/* L50: */
-                       }
-                   }
-                   kk += *n - j + 1;
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.) {
-                       if (nounit) {
-                           x[jx] /= ap[kk];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           x[ix] -= temp * ap[k];
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A' )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= ap[k] * x[i__];
-                       ++k;
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk + j - 1];
-                   }
-                   x[j] = temp;
-                   kk += j;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       temp -= ap[k] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk + j - 1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   kk += j;
-/* L120: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   k = kk;
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= ap[k] * x[i__];
-                       --k;
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk - *n + j];
-                   }
-                   x[j] = temp;
-                   kk -= *n - j + 1;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__1 = kk - (*n - (j + 1));
-                   for (k = kk; k >= i__1; --k) {
-                       temp -= ap[k] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk - *n + j];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of DTPSV . */
-} /* dtpsv_ */
-/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
-       lda, doublereal *b, integer *ldb, ftnlen side_len, ftnlen uplo_len, 
-       ftnlen transa_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublereal temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DTRMM  performs one of the matrix-matrix operations */
-/*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ), */
-/*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-/*     op( A ) = A   or   op( A ) = A'. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE specifies whether  op( A ) multiplies B from */
-/*           the left or right as follows: */
-/*              SIDE = 'L' or 'l'   B := alpha*op( A )*B. */
-/*              SIDE = 'R' or 'r'   B := alpha*B*op( A ). */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-/*              TRANSA = 'C' or 'c'   op( A ) = A'. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain the matrix  B,  and  on exit  is overwritten  by the */
-/*           transformed matrix. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("DTRMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               b[i__ + j * b_dim1] = 0.;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*A*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (b[k + j * b_dim1] != 0.) {
-                           temp = *alpha * b[k + j * b_dim1];
-                           i__3 = k - 1;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * a[i__ + k * 
-                                       a_dim1];
-/* L30: */
-                           }
-                           if (nounit) {
-                               temp *= a[k + k * a_dim1];
-                           }
-                           b[k + j * b_dim1] = temp;
-                       }
-/* L40: */
-                   }
-/* L50: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (k = *m; k >= 1; --k) {
-                       if (b[k + j * b_dim1] != 0.) {
-                           temp = *alpha * b[k + j * b_dim1];
-                           b[k + j * b_dim1] = temp;
-                           if (nounit) {
-                               b[k + j * b_dim1] *= a[k + k * a_dim1];
-                           }
-                           i__2 = *m;
-                           for (i__ = k + 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * a[i__ + k * 
-                                       a_dim1];
-/* L60: */
-                           }
-                       }
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*A'*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       temp = b[i__ + j * b_dim1];
-                       if (nounit) {
-                           temp *= a[i__ + i__ * a_dim1];
-                       }
-                       i__2 = i__ - 1;
-                       for (k = 1; k <= i__2; ++k) {
-                           temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L90: */
-                       }
-                       b[i__ + j * b_dim1] = *alpha * temp;
-/* L100: */
-                   }
-/* L110: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp = b[i__ + j * b_dim1];
-                       if (nounit) {
-                           temp *= a[i__ + i__ * a_dim1];
-                       }
-                       i__3 = *m;
-                       for (k = i__ + 1; k <= i__3; ++k) {
-                           temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L120: */
-                       }
-                       b[i__ + j * b_dim1] = *alpha * temp;
-/* L130: */
-                   }
-/* L140: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*B*A. */
-           if (upper) {
-               for (j = *n; j >= 1; --j) {
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__1 = *m;
-                   for (i__ = 1; i__ <= i__1; ++i__) {
-                       b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L150: */
-                   }
-                   i__1 = j - 1;
-                   for (k = 1; k <= i__1; ++k) {
-                       if (a[k + j * a_dim1] != 0.) {
-                           temp = *alpha * a[k + j * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L160: */
-                           }
-                       }
-/* L170: */
-                   }
-/* L180: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L190: */
-                   }
-                   i__2 = *n;
-                   for (k = j + 1; k <= i__2; ++k) {
-                       if (a[k + j * a_dim1] != 0.) {
-                           temp = *alpha * a[k + j * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L200: */
-                           }
-                       }
-/* L210: */
-                   }
-/* L220: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*B*A'. */
-           if (upper) {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   i__2 = k - 1;
-                   for (j = 1; j <= i__2; ++j) {
-                       if (a[j + k * a_dim1] != 0.) {
-                           temp = *alpha * a[j + k * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L230: */
-                           }
-                       }
-/* L240: */
-                   }
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[k + k * a_dim1];
-                   }
-                   if (temp != 1.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L250: */
-                       }
-                   }
-/* L260: */
-               }
-           } else {
-               for (k = *n; k >= 1; --k) {
-                   i__1 = *n;
-                   for (j = k + 1; j <= i__1; ++j) {
-                       if (a[j + k * a_dim1] != 0.) {
-                           temp = *alpha * a[j + k * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L270: */
-                           }
-                       }
-/* L280: */
-                   }
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[k + k * a_dim1];
-                   }
-                   if (temp != 1.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L290: */
-                       }
-                   }
-/* L300: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of DTRMM . */
-} /* dtrmm_ */
-/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublereal *a, integer *lda, doublereal *x, integer *incx, ftnlen 
-       uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DTRMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("DTRMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[i__] += temp * a[i__ + j * a_dim1];
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j + j * a_dim1];
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[ix] += temp * a[i__ + j * a_dim1];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j + j * a_dim1];
-                       }
-                   }
-                   jx += *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j + j * a_dim1];
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[ix] += temp * a[i__ + j * a_dim1];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j + j * a_dim1];
-                       }
-                   }
-                   jx -= *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       ix -= *incx;
-                       temp += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       temp += a[i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       temp += a[i__ + j * a_dim1] * x[ix];
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of DTRMV . */
-} /* dtrmv_ */
-/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
-       lda, doublereal *b, integer *ldb, ftnlen side_len, ftnlen uplo_len, 
-       ftnlen transa_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublereal temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DTRSM  solves one of the matrix equations */
-/*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B, */
-/*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-/*     op( A ) = A   or   op( A ) = A'. */
-/*  The matrix X is overwritten on B. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry, SIDE specifies whether op( A ) appears on the left */
-/*           or right of X as follows: */
-/*              SIDE = 'L' or 'l'   op( A )*X = alpha*B. */
-/*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B. */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-/*              TRANSA = 'C' or 'c'   op( A ) = A'. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain  the  right-hand  side  matrix  B,  and  on exit  is */
-/*           overwritten by the solution matrix  X. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("DTRSM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               b[i__ + j * b_dim1] = 0.;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*inv( A )*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L30: */
-                       }
-                   }
-                   for (k = *m; k >= 1; --k) {
-                       if (b[k + j * b_dim1] != 0.) {
-                           if (nounit) {
-                               b[k + j * b_dim1] /= a[k + k * a_dim1];
-                           }
-                           i__2 = k - 1;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-                                       i__ + k * a_dim1];
-/* L40: */
-                           }
-                       }
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L70: */
-                       }
-                   }
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (b[k + j * b_dim1] != 0.) {
-                           if (nounit) {
-                               b[k + j * b_dim1] /= a[k + k * a_dim1];
-                           }
-                           i__3 = *m;
-                           for (i__ = k + 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-                                       i__ + k * a_dim1];
-/* L80: */
-                           }
-                       }
-/* L90: */
-                   }
-/* L100: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*inv( A' )*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp = *alpha * b[i__ + j * b_dim1];
-                       i__3 = i__ - 1;
-                       for (k = 1; k <= i__3; ++k) {
-                           temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L110: */
-                       }
-                       if (nounit) {
-                           temp /= a[i__ + i__ * a_dim1];
-                       }
-                       b[i__ + j * b_dim1] = temp;
-/* L120: */
-                   }
-/* L130: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       temp = *alpha * b[i__ + j * b_dim1];
-                       i__2 = *m;
-                       for (k = i__ + 1; k <= i__2; ++k) {
-                           temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L140: */
-                       }
-                       if (nounit) {
-                           temp /= a[i__ + i__ * a_dim1];
-                       }
-                       b[i__ + j * b_dim1] = temp;
-/* L150: */
-                   }
-/* L160: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*B*inv( A ). */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L170: */
-                       }
-                   }
-                   i__2 = j - 1;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (a[k + j * a_dim1] != 0.) {
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-                                       i__ + k * b_dim1];
-/* L180: */
-                           }
-                       }
-/* L190: */
-                   }
-                   if (nounit) {
-                       temp = 1. / a[j + j * a_dim1];
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L200: */
-                       }
-                   }
-/* L210: */
-               }
-           } else {
-               for (j = *n; j >= 1; --j) {
-                   if (*alpha != 1.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L220: */
-                       }
-                   }
-                   i__1 = *n;
-                   for (k = j + 1; k <= i__1; ++k) {
-                       if (a[k + j * a_dim1] != 0.) {
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-                                       i__ + k * b_dim1];
-/* L230: */
-                           }
-                       }
-/* L240: */
-                   }
-                   if (nounit) {
-                       temp = 1. / a[j + j * a_dim1];
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L250: */
-                       }
-                   }
-/* L260: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*B*inv( A' ). */
-           if (upper) {
-               for (k = *n; k >= 1; --k) {
-                   if (nounit) {
-                       temp = 1. / a[k + k * a_dim1];
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L270: */
-                       }
-                   }
-                   i__1 = k - 1;
-                   for (j = 1; j <= i__1; ++j) {
-                       if (a[j + k * a_dim1] != 0.) {
-                           temp = a[j + k * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= temp * b[i__ + k * 
-                                       b_dim1];
-/* L280: */
-                           }
-                       }
-/* L290: */
-                   }
-                   if (*alpha != 1.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-                                   ;
-/* L300: */
-                       }
-                   }
-/* L310: */
-               }
-           } else {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   if (nounit) {
-                       temp = 1. / a[k + k * a_dim1];
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L320: */
-                       }
-                   }
-                   i__2 = *n;
-                   for (j = k + 1; j <= i__2; ++j) {
-                       if (a[j + k * a_dim1] != 0.) {
-                           temp = a[j + k * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= temp * b[i__ + k * 
-                                       b_dim1];
-/* L330: */
-                           }
-                       }
-/* L340: */
-                   }
-                   if (*alpha != 1.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-                                   ;
-/* L350: */
-                       }
-                   }
-/* L360: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of DTRSM . */
-} /* dtrsm_ */
-/* Subroutine */ int dtrsv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublereal *a, integer *lda, doublereal *x, integer *incx, ftnlen 
-       uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  DTRSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("DTRSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       if (nounit) {
-                           x[j] /= a[j + j * a_dim1];
-                       }
-                       temp = x[j];
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           x[i__] -= temp * a[i__ + j * a_dim1];
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.) {
-                       if (nounit) {
-                           x[jx] /= a[j + j * a_dim1];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           x[ix] -= temp * a[i__ + j * a_dim1];
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       if (nounit) {
-                           x[j] /= a[j + j * a_dim1];
-                       }
-                       temp = x[j];
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * a[i__ + j * a_dim1];
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.) {
-                       if (nounit) {
-                           x[jx] /= a[j + j * a_dim1];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           x[ix] -= temp * a[i__ + j * a_dim1];
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A' )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= a[i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= a[i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= a[i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of DTRSV . */
-} /* dtrsv_ */
-doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx)
-    /* System generated locals */
-    integer i__1;
-    doublereal ret_val;
-    /* Local variables */
-    static integer i__, ix;
-    static doublereal stemp;
-    extern doublereal dcabs1_(doublecomplex *);
-/*     takes the sum of the absolute values. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --zx;
-    /* Function Body */
-    ret_val = 0.;
-    stemp = 0.;
-    if (*n <= 0 || *incx <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    ix = 1;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp += dcabs1_(&zx[ix]);
-       ix += *incx;
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-/*        code for increment equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp += dcabs1_(&zx[i__]);
-/* L30: */
-    }
-    ret_val = stemp;
-    return ret_val;
-} /* dzasum_ */
-doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx)
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    doublereal ret_val, d__1;
-    /* Builtin functions */
-    double d_imag(doublecomplex *), sqrt(doublereal);
-    /* Local variables */
-    static integer ix;
-    static doublereal ssq, temp, norm, scale;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  DZNRM2 returns the euclidean norm of a vector via the function */
-/*  name, so that */
-/*     DZNRM2 := sqrt( conjg( x' )*x ) */
-/*  -- This version written on 25-October-1982. */
-/*     Modified on 14-October-1993 to inline the call to ZLASSQ. */
-/*     Sven Hammarling, Nag Ltd. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-    /* Parameter adjustments */
-    --x;
-    /* Function Body */
-    if (*n < 1 || *incx < 1) {
-       norm = 0.;
-    } else {
-       scale = 0.;
-       ssq = 1.;
-/*        The following loop is equivalent to this call to the LAPACK */
-/*        auxiliary routine: */
-/*        CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) */
-       i__1 = (*n - 1) * *incx + 1;
-       i__2 = *incx;
-       for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-           i__3 = ix;
-           if (x[i__3].r != 0.) {
-               i__3 = ix;
-               temp = (d__1 = x[i__3].r, abs(d__1));
-               if (scale < temp) {
-/* Computing 2nd power */
-                   d__1 = scale / temp;
-                   ssq = ssq * (d__1 * d__1) + 1.;
-                   scale = temp;
-               } else {
-/* Computing 2nd power */
-                   d__1 = temp / scale;
-                   ssq += d__1 * d__1;
-               }
-           }
-           if (d_imag(&x[ix]) != 0.) {
-               temp = (d__1 = d_imag(&x[ix]), abs(d__1));
-               if (scale < temp) {
-/* Computing 2nd power */
-                   d__1 = scale / temp;
-                   ssq = ssq * (d__1 * d__1) + 1.;
-                   scale = temp;
-               } else {
-/* Computing 2nd power */
-                   d__1 = temp / scale;
-                   ssq += d__1 * d__1;
-               }
-           }
-/* L10: */
-       }
-       norm = scale * sqrt(ssq);
-    }
-    ret_val = norm;
-    return ret_val;
-/*     End of DZNRM2. */
-} /* dznrm2_ */
-integer icamax_(integer *n, complex *cx, integer *incx)
-    /* System generated locals */
-    integer ret_val, i__1, i__2;
-    real r__1, r__2;
-    /* Builtin functions */
-    double r_imag(complex *);
-    /* Local variables */
-    static integer i__, ix;
-    static real smax;
-/*     finds the index of element having max. absolute value. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --cx;
-    /* Function Body */
-    ret_val = 0;
-    if (*n < 1 || *incx <= 0) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    ix = 1;
-    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), dabs(
-               r__2)) <= smax) {
-           goto L5;
-       }
-       ret_val = i__;
-       i__2 = ix;
-       smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), 
-               dabs(r__2));
-       ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-/*        code for increment equal to 1 */
-    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), dabs(
-               r__2)) <= smax) {
-           goto L30;
-       }
-       ret_val = i__;
-       i__2 = i__;
-       smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), 
-               dabs(r__2));
-       ;
-    }
-    return ret_val;
-} /* icamax_ */
-integer idamax_(integer *n, doublereal *dx, integer *incx)
-    /* System generated locals */
-    integer ret_val, i__1;
-    doublereal d__1;
-    /* Local variables */
-    static integer i__, ix;
-    static doublereal dmax__;
-/*     finds the index of element having max. absolute value. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --dx;
-    /* Function Body */
-    ret_val = 0;
-    if (*n < 1 || *incx <= 0) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    ix = 1;
-    dmax__ = abs(dx[1]);
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if ((d__1 = dx[ix], abs(d__1)) <= dmax__) {
-           goto L5;
-       }
-       ret_val = i__;
-       dmax__ = (d__1 = dx[ix], abs(d__1));
-       ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-/*        code for increment equal to 1 */
-    dmax__ = abs(dx[1]);
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if ((d__1 = dx[i__], abs(d__1)) <= dmax__) {
-           goto L30;
-       }
-       ret_val = i__;
-       dmax__ = (d__1 = dx[i__], abs(d__1));
-       ;
-    }
-    return ret_val;
-} /* idamax_ */
-integer isamax_(integer *n, real *sx, integer *incx)
-    /* System generated locals */
-    integer ret_val, i__1;
-    real r__1;
-    /* Local variables */
-    static integer i__, ix;
-    static real smax;
-/*     finds the index of element having max. absolute value. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --sx;
-    /* Function Body */
-    ret_val = 0;
-    if (*n < 1 || *incx <= 0) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    ix = 1;
-    smax = dabs(sx[1]);
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if ((r__1 = sx[ix], dabs(r__1)) <= smax) {
-           goto L5;
-       }
-       ret_val = i__;
-       smax = (r__1 = sx[ix], dabs(r__1));
-       ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-/*        code for increment equal to 1 */
-    smax = dabs(sx[1]);
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if ((r__1 = sx[i__], dabs(r__1)) <= smax) {
-           goto L30;
-       }
-       ret_val = i__;
-       smax = (r__1 = sx[i__], dabs(r__1));
-       ;
-    }
-    return ret_val;
-} /* isamax_ */
-integer izamax_(integer *n, doublecomplex *zx, integer *incx)
-    /* System generated locals */
-    integer ret_val, i__1;
-    /* Local variables */
-    static integer i__, ix;
-    static doublereal smax;
-    extern doublereal dcabs1_(doublecomplex *);
-/*     finds the index of element having max. absolute value. */
-/*     jack dongarra, 1/15/85. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --zx;
-    /* Function Body */
-    ret_val = 0;
-    if (*n < 1 || *incx <= 0) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    ix = 1;
-    smax = dcabs1_(&zx[1]);
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if (dcabs1_(&zx[ix]) <= smax) {
-           goto L5;
-       }
-       ret_val = i__;
-       smax = dcabs1_(&zx[ix]);
-       ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-/*        code for increment equal to 1 */
-    smax = dcabs1_(&zx[1]);
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if (dcabs1_(&zx[i__]) <= smax) {
-           goto L30;
-       }
-       ret_val = i__;
-       smax = dcabs1_(&zx[i__]);
-       ;
-    }
-    return ret_val;
-} /* izamax_ */
-doublereal sasum_(integer *n, real *sx, integer *incx)
-    /* System generated locals */
-    integer i__1, i__2;
-    real ret_val, r__1, r__2, r__3, r__4, r__5, r__6;
-    /* Local variables */
-    static integer i__, m, mp1, nincx;
-    static real stemp;
-/*     takes the sum of the absolute values. */
-/*     uses unrolled loops for increment equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --sx;
-    /* Function Body */
-    ret_val = 0.f;
-    stemp = 0.f;
-    if (*n <= 0 || *incx <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       stemp += (r__1 = sx[i__], dabs(r__1));
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-/*        code for increment equal to 1 */
-/*        clean-up loop */
-    m = *n % 6;
-    if (m == 0) {
-       goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       stemp += (r__1 = sx[i__], dabs(r__1));
-/* L30: */
-    }
-    if (*n < 6) {
-       goto L60;
-    }
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 6) {
-       stemp = stemp + (r__1 = sx[i__], dabs(r__1)) + (r__2 = sx[i__ + 1], 
-               dabs(r__2)) + (r__3 = sx[i__ + 2], dabs(r__3)) + (r__4 = sx[
-               i__ + 3], dabs(r__4)) + (r__5 = sx[i__ + 4], dabs(r__5)) + (
-               r__6 = sx[i__ + 5], dabs(r__6));
-/* L50: */
-    }
-    ret_val = stemp;
-    return ret_val;
-} /* sasum_ */
-/* Subroutine */ int saxpy_(integer *n, real *sa, real *sx, integer *incx, 
-       real *sy, integer *incy)
-    /* System generated locals */
-    integer i__1;
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-/*     constant times a vector plus a vector. */
-/*     uses unrolled loop for increments equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*sa == 0.f) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       sy[iy] += *sa * sx[ix];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*        code for both increments equal to 1 */
-/*        clean-up loop */
-    m = *n % 4;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       sy[i__] += *sa * sx[i__];
-/* L30: */
-    }
-    if (*n < 4) {
-       return 0;
-    }
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 4) {
-       sy[i__] += *sa * sx[i__];
-       sy[i__ + 1] += *sa * sx[i__ + 1];
-       sy[i__ + 2] += *sa * sx[i__ + 2];
-       sy[i__ + 3] += *sa * sx[i__ + 3];
-/* L50: */
-    }
-    return 0;
-} /* saxpy_ */
-doublereal scasum_(integer *n, complex *cx, integer *incx)
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    real ret_val, r__1, r__2;
-    /* Builtin functions */
-    double r_imag(complex *);
-    /* Local variables */
-    static integer i__, nincx;
-    static real stemp;
-/*     takes the sum of the absolute values of a complex vector and */
-/*     returns a single precision result. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --cx;
-    /* Function Body */
-    ret_val = 0.f;
-    stemp = 0.f;
-    if (*n <= 0 || *incx <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       i__3 = i__;
-       stemp = stemp + (r__1 = cx[i__3].r, dabs(r__1)) + (r__2 = r_imag(&cx[
-               i__]), dabs(r__2));
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-/*        code for increment equal to 1 */
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       i__1 = i__;
-       stemp = stemp + (r__1 = cx[i__1].r, dabs(r__1)) + (r__2 = r_imag(&cx[
-               i__]), dabs(r__2));
-/* L30: */
-    }
-    ret_val = stemp;
-    return ret_val;
-} /* scasum_ */
-doublereal scnrm2_(integer *n, complex *x, integer *incx)
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    real ret_val, r__1;
-    /* Builtin functions */
-    double r_imag(complex *), sqrt(doublereal);
-    /* Local variables */
-    static integer ix;
-    static real ssq, temp, norm, scale;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  SCNRM2 returns the euclidean norm of a vector via the function */
-/*  name, so that */
-/*     SCNRM2 := sqrt( conjg( x' )*x ) */
-/*  -- This version written on 25-October-1982. */
-/*     Modified on 14-October-1993 to inline the call to CLASSQ. */
-/*     Sven Hammarling, Nag Ltd. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-    /* Parameter adjustments */
-    --x;
-    /* Function Body */
-    if (*n < 1 || *incx < 1) {
-       norm = 0.f;
-    } else {
-       scale = 0.f;
-       ssq = 1.f;
-/*        The following loop is equivalent to this call to the LAPACK */
-/*        auxiliary routine: */
-/*        CALL CLASSQ( N, X, INCX, SCALE, SSQ ) */
-       i__1 = (*n - 1) * *incx + 1;
-       i__2 = *incx;
-       for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-           i__3 = ix;
-           if (x[i__3].r != 0.f) {
-               i__3 = ix;
-               temp = (r__1 = x[i__3].r, dabs(r__1));
-               if (scale < temp) {
-/* Computing 2nd power */
-                   r__1 = scale / temp;
-                   ssq = ssq * (r__1 * r__1) + 1.f;
-                   scale = temp;
-               } else {
-/* Computing 2nd power */
-                   r__1 = temp / scale;
-                   ssq += r__1 * r__1;
-               }
-           }
-           if (r_imag(&x[ix]) != 0.f) {
-               temp = (r__1 = r_imag(&x[ix]), dabs(r__1));
-               if (scale < temp) {
-/* Computing 2nd power */
-                   r__1 = scale / temp;
-                   ssq = ssq * (r__1 * r__1) + 1.f;
-                   scale = temp;
-               } else {
-/* Computing 2nd power */
-                   r__1 = temp / scale;
-                   ssq += r__1 * r__1;
-               }
-           }
-/* L10: */
-       }
-       norm = scale * sqrt(ssq);
-    }
-    ret_val = norm;
-    return ret_val;
-/*     End of SCNRM2. */
-} /* scnrm2_ */
-/* Subroutine */ int scopy_(integer *n, real *sx, integer *incx, real *sy, 
-       integer *incy)
-    /* System generated locals */
-    integer i__1;
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-/*     copies a vector, x, to a vector, y. */
-/*     uses unrolled loops for increments equal to 1. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       sy[iy] = sx[ix];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*        code for both increments equal to 1 */
-/*        clean-up loop */
-    m = *n % 7;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       sy[i__] = sx[i__];
-/* L30: */
-    }
-    if (*n < 7) {
-       return 0;
-    }
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 7) {
-       sy[i__] = sx[i__];
-       sy[i__ + 1] = sx[i__ + 1];
-       sy[i__ + 2] = sx[i__ + 2];
-       sy[i__ + 3] = sx[i__ + 3];
-       sy[i__ + 4] = sx[i__ + 4];
-       sy[i__ + 5] = sx[i__ + 5];
-       sy[i__ + 6] = sx[i__ + 6];
-/* L50: */
-    }
-    return 0;
-} /* scopy_ */
-doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
-    /* System generated locals */
-    integer i__1;
-    real ret_val;
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static real stemp;
-/*     forms the dot product of two vectors. */
-/*     uses unrolled loops for increments equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-    /* Function Body */
-    stemp = 0.f;
-    ret_val = 0.f;
-    if (*n <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp += sx[ix] * sy[iy];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-/*        code for both increments equal to 1 */
-/*        clean-up loop */
-    m = *n % 5;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp += sx[i__] * sy[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-       goto L60;
-    }
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 5) {
-       stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[
-               i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ + 
-               4] * sy[i__ + 4];
-/* L50: */
-    }
-    ret_val = stemp;
-    return ret_val;
-} /* sdot_ */
-doublereal sdsdot_(integer *n, real *sb, real *sx, integer *incx, real *sy, 
-       integer *incy)
-    /* System generated locals */
-    integer i__1, i__2;
-    real ret_val;
-    /* Local variables */
-    static integer i__, ns, kx, ky;
-    static doublereal dsdot;
-/* ***PURPOSE  Compute the inner product of two vectors with extended */
-/*            precision accumulation. */
-/* ***CATEGORY  D1A4 */
-/* ***AUTHOR  Lawson, C. L., (JPL) */
-/*           Hanson, R. J., (SNLA) */
-/*           Kincaid, D. R., (U. of Texas) */
-/*           Krogh, F. T., (JPL) */
-/*                B L A S  Subprogram */
-/*    Description of Parameters */
-/*     --Input-- */
-/*        N  number of elements in input vector(s) */
-/*       SB  single precision scalar to be added to inner product */
-/*       SX  single precision vector with N elements */
-/*     INCX  storage spacing between elements of SX */
-/*       SY  single precision vector with N elements */
-/*     INCY  storage spacing between elements of SY */
-/*     --Output-- */
-/*   SDSDOT  single precision dot product (SB if N .LE. 0) */
-/*     Returns S.P. result with dot product accumulated in D.P. */
-/*     SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), */
-/*     where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is */
-/*     defined in a similar way using INCY. */
-/* ***REFERENCES  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. */
-/*                 Krogh, Basic linear algebra subprograms for Fortran */
-/*                 usage, Algorithm No. 539, Transactions on Mathematical */
-/*                 Software 5, 3 (September 1979), pp. 308-323. */
-/*   791001  DATE WRITTEN */
-/*   890531  Changed all specific intrinsics to generic.  (WRB) */
-/*   890831  Modified array declarations.  (WRB) */
-/*   890831  REVISION DATE from Version 3.2 */
-/*   891214  Prologue converted to Version 4.0 format.  (BAB) */
-/*   920310  Corrected definition of LX in DESCRIPTION.  (WRB) */
-/*   920501  Reformatted the REFERENCES section.  (WRB) */
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-    /* Function Body */
-    dsdot = *sb;
-    if (*n <= 0) {
-       goto L30;
-    }
-    if (*incx == *incy && *incx > 0) {
-       goto L40;
-    }
-/*     Code for unequal or nonpositive increments. */
-    kx = 1;
-    ky = 1;
-    if (*incx < 0) {
-       kx = (1 - *n) * *incx + 1;
-    }
-    if (*incy < 0) {
-       ky = (1 - *n) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dsdot += (doublereal) sx[kx] * (doublereal) sy[ky];
-       kx += *incx;
-       ky += *incy;
-/* L10: */
-    }
-    ret_val = dsdot;
-    return ret_val;
-/*     Code for equal and positive increments. */
-    ns = *n * *incx;
-    i__1 = ns;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       dsdot += (doublereal) sx[i__] * (doublereal) sy[i__];
-/* L50: */
-    }
-    ret_val = dsdot;
-    return ret_val;
-} /* sdsdot_ */
-/* Subroutine */ int sgbmv_(char *trans, integer *m, integer *n, integer *kl, 
-       integer *ku, real *alpha, real *a, integer *lda, real *x, integer *
-       incx, real *beta, real *y, integer *incy, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    /* Local variables */
-    static integer i__, j, k, ix, iy, jx, jy, kx, ky, kup1, info;
-    static real temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SGBMV  performs one of the matrix-vector operations */
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n band matrix, with kl sub-diagonals and ku super-diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-/*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  KL     - INTEGER. */
-/*           On entry, KL specifies the number of sub-diagonals of the */
-/*           matrix A. KL must satisfy  0 .le. KL. */
-/*           Unchanged on exit. */
-/*  KU     - INTEGER. */
-/*           On entry, KU specifies the number of super-diagonals of the */
-/*           matrix A. KU must satisfy  0 .le. KU. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading ( kl + ku + 1 ) by n part of the */
-/*           array A must contain the matrix of coefficients, supplied */
-/*           column by column, with the leading diagonal of the matrix in */
-/*           row ( ku + 1 ) of the array, the first super-diagonal */
-/*           starting at position 2 in row ku, the first sub-diagonal */
-/*           starting at position 1 in row ( ku + 2 ), and so on. */
-/*           Elements in the array A that do not correspond to elements */
-/*           in the band matrix (such as the top left ku by ku triangle) */
-/*           are not referenced. */
-/*           The following program segment will transfer a band matrix */
-/*           from conventional full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    K = KU + 1 - J */
-/*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */
-/*                       A( K + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( kl + ku + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*kl < 0) {
-       info = 4;
-    } else if (*ku < 0) {
-       info = 5;
-    } else if (*lda < *kl + *ku + 1) {
-       info = 8;
-    } else if (*incx == 0) {
-       info = 10;
-    } else if (*incy == 0) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("SGBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the band part of A. */
-/*     First form  y := beta*y. */
-    if (*beta != 1.f) {
-       if (*incy == 1) {
-           if (*beta == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.f) {
-       return 0;
-    }
-    kup1 = *ku + 1;
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       y[i__] += temp * a[k + i__ + j * a_dim1];
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   iy = ky;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       y[iy] += temp * a[k + i__ + j * a_dim1];
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               if (j > *ku) {
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.f;
-               k = kup1 - j;
-/* Computing MAX */
-               i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-               i__5 = *m, i__6 = j + *kl;
-               i__2 = min(i__5,i__6);
-               for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                   temp += a[k + i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L100: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.f;
-               ix = kx;
-               k = kup1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-               i__5 = *m, i__6 = j + *kl;
-               i__4 = min(i__5,i__6);
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   temp += a[k + i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-/* L110: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-               if (j > *ku) {
-                   kx += *incx;
-               }
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of SGBMV . */
-} /* sgbmv_ */
-/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer *
-       n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *
-       ldb, real *beta, real *c__, integer *ldc, ftnlen transa_len, ftnlen 
-       transb_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-    /* Local variables */
-    static integer i__, j, l, info;
-    static logical nota, notb;
-    static real temp;
-    static integer ncola;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa, nrowb;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SGEMM  performs one of the matrix-matrix operations */
-/*     C := alpha*op( A )*op( B ) + beta*C, */
-/*  where  op( X ) is one of */
-/*     op( X ) = X   or   op( X ) = X', */
-/*  alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
-/*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n',  op( A ) = A. */
-/*              TRANSA = 'T' or 't',  op( A ) = A'. */
-/*              TRANSA = 'C' or 'c',  op( A ) = A'. */
-/*           Unchanged on exit. */
-/*           On entry, TRANSB specifies the form of op( B ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSB = 'N' or 'n',  op( B ) = B. */
-/*              TRANSB = 'T' or 't',  op( B ) = B'. */
-/*              TRANSB = 'C' or 'c',  op( B ) = B'. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies  the number  of rows  of the  matrix */
-/*           op( A )  and of the  matrix  C.  M  must  be at least  zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N  specifies the number  of columns of the matrix */
-/*           op( B ) and the number of columns of the matrix C. N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry,  K  specifies  the number of columns of the matrix */
-/*           op( A ) and the number of rows of the matrix op( B ). K must */
-/*           be at least  zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise. */
-/*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by m  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  B      - REAL             array of DIMENSION ( LDB, kb ), where kb is */
-/*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise. */
-/*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  n by k  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then */
-/*           LDB must be at least  max( 1, k ), otherwise  LDB must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-/*  BETA   - REAL            . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-/*  C      - REAL             array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n  matrix */
-/*           ( alpha*op( A )*op( B ) + beta*C ). */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not */
-/*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows */
-/*     and  columns of  A  and the  number of  rows  of  B  respectively. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    nota = lsame_(transa, "N", (ftnlen)1, (ftnlen)1);
-    notb = lsame_(transb, "N", (ftnlen)1, (ftnlen)1);
-    if (nota) {
-       nrowa = *m;
-       ncola = *k;
-    } else {
-       nrowa = *k;
-       ncola = *m;
-    }
-    if (notb) {
-       nrowb = *k;
-    } else {
-       nrowb = *n;
-    }
-/*     Test the input parameters. */
-    info = 0;
-    if (! nota && ! lsame_(transa, "C", (ftnlen)1, (ftnlen)1) && ! lsame_(
-           transa, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! notb && ! lsame_(transb, "C", (ftnlen)1, (ftnlen)1) && ! 
-           lsame_(transb, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < max(1,nrowa)) {
-       info = 8;
-    } else if (*ldb < max(1,nrowb)) {
-       info = 10;
-    } else if (*ldc < max(1,*m)) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("SGEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
-       return 0;
-    }
-/*     And if */
-    if (*alpha == 0.f) {
-       if (*beta == 0.f) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = 0.f;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (notb) {
-       if (nota) {
-/*           Form  C := alpha*A*B + beta*C. */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L50: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L60: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (b[l + j * b_dim1] != 0.f) {
-                       temp = *alpha * b[l + j * b_dim1];
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L70: */
-                       }
-                   }
-/* L80: */
-               }
-/* L90: */
-           }
-       } else {
-/*           Form  C := alpha*A'*B + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-/* L100: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L110: */
-               }
-/* L120: */
-           }
-       }
-    } else {
-       if (nota) {
-/*           Form  C := alpha*A*B' + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L130: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L140: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (b[j + l * b_dim1] != 0.f) {
-                       temp = *alpha * b[j + l * b_dim1];
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L150: */
-                       }
-                   }
-/* L160: */
-               }
-/* L170: */
-           }
-       } else {
-/*           Form  C := alpha*A'*B' + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
-/* L180: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L190: */
-               }
-/* L200: */
-           }
-       }
-    }
-    return 0;
-/*     End of SGEMM . */
-} /* sgemm_ */
-/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, 
-       real *a, integer *lda, real *x, integer *incx, real *beta, real *y, 
-       integer *incy, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static real temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SGEMV  performs one of the matrix-vector operations */
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-/*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry with BETA non-zero, the incremented array Y */
-/*           must contain the vector y. On exit, Y is overwritten by the */
-/*           updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*lda < max(1,*m)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("SGEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-/*     First form  y := beta*y. */
-    if (*beta != 1.f) {
-       if (*incy == 1) {
-           if (*beta == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.f) {
-       return 0;
-    }
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       y[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   iy = ky;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       y[iy] += temp * a[i__ + j * a_dim1];
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.f;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L100: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.f;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp += a[i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-/* L110: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of SGEMV . */
-} /* sgemv_ */
-/* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x, 
-       integer *incx, real *y, integer *incy, real *a, integer *lda)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static real temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SGER   performs the rank 1 operation */
-/*     A := alpha*x*y' + A, */
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("SGER  ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           if (y[jy] != 0.f) {
-               temp = *alpha * y[jy];
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           if (y[jy] != 0.f) {
-               temp = *alpha * y[jy];
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   a[i__ + j * a_dim1] += x[ix] * temp;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-    return 0;
-/*     End of SGER  . */
-} /* sger_ */
-doublereal snrm2_(integer *n, real *x, integer *incx)
-    /* System generated locals */
-    integer i__1, i__2;
-    real ret_val, r__1;
-    /* Builtin functions */
-    double sqrt(doublereal);
-    /* Local variables */
-    static integer ix;
-    static real ssq, norm, scale, absxi;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  SNRM2 returns the euclidean norm of a vector via the function */
-/*  name, so that */
-/*     SNRM2 := sqrt( x'*x ) */
-/*  -- This version written on 25-October-1982. */
-/*     Modified on 14-October-1993 to inline the call to SLASSQ. */
-/*     Sven Hammarling, Nag Ltd. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-    /* Parameter adjustments */
-    --x;
-    /* Function Body */
-    if (*n < 1 || *incx < 1) {
-       norm = 0.f;
-    } else if (*n == 1) {
-       norm = dabs(x[1]);
-    } else {
-       scale = 0.f;
-       ssq = 1.f;
-/*        The following loop is equivalent to this call to the LAPACK */
-/*        auxiliary routine: */
-/*        CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */
-       i__1 = (*n - 1) * *incx + 1;
-       i__2 = *incx;
-       for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-           if (x[ix] != 0.f) {
-               absxi = (r__1 = x[ix], dabs(r__1));
-               if (scale < absxi) {
-/* Computing 2nd power */
-                   r__1 = scale / absxi;
-                   ssq = ssq * (r__1 * r__1) + 1.f;
-                   scale = absxi;
-               } else {
-/* Computing 2nd power */
-                   r__1 = absxi / scale;
-                   ssq += r__1 * r__1;
-               }
-           }
-/* L10: */
-       }
-       norm = scale * sqrt(ssq);
-    }
-    ret_val = norm;
-    return ret_val;
-/*     End of SNRM2. */
-} /* snrm2_ */
-/* Subroutine */ int srot_(integer *n, real *sx, integer *incx, real *sy, 
-       integer *incy, real *c__, real *s)
-    /* System generated locals */
-    integer i__1;
-    /* Local variables */
-    static integer i__, ix, iy;
-    static real stemp;
-/*     applies a plane rotation. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp = *c__ * sx[ix] + *s * sy[iy];
-       sy[iy] = *c__ * sy[iy] - *s * sx[ix];
-       sx[ix] = stemp;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*       code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp = *c__ * sx[i__] + *s * sy[i__];
-       sy[i__] = *c__ * sy[i__] - *s * sx[i__];
-       sx[i__] = stemp;
-/* L30: */
-    }
-    return 0;
-} /* srot_ */
-double r_sign(real *a, real *b)
- {
- double x;
- x = (*a >= 0 ? *a : - *a);
- return( *b >= 0 ? x : -x);
- }
-/* Subroutine */ int srotg_(real *sa, real *sb, real *c__, real *s)
-    /* System generated locals */
-    real r__1, r__2;
-    /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
-    /* Local variables */
-    static real r__, z__, roe, scale;
-/*     construct givens plane rotation. */
-/*     jack dongarra, linpack, 3/11/78. */
-    roe = *sb;
-    if (dabs(*sa) > dabs(*sb)) {
-       roe = *sa;
-    }
-    scale = dabs(*sa) + dabs(*sb);
-    if (scale != 0.f) {
-       goto L10;
-    }
-    *c__ = 1.f;
-    *s = 0.f;
-    r__ = 0.f;
-    z__ = 0.f;
-    goto L20;
-/* Computing 2nd power */
-    r__1 = *sa / scale;
-/* Computing 2nd power */
-    r__2 = *sb / scale;
-    r__ = scale * sqrt(r__1 * r__1 + r__2 * r__2);
-    r__ = r_sign(&c_b1543, &roe) * r__;
-    *c__ = *sa / r__;
-    *s = *sb / r__;
-    z__ = 1.f;
-    if (dabs(*sa) > dabs(*sb)) {
-       z__ = *s;
-    }
-    if (dabs(*sb) >= dabs(*sa) && *c__ != 0.f) {
-       z__ = 1.f / *c__;
-    }
-    *sa = r__;
-    *sb = z__;
-    return 0;
-} /* srotg_ */
-/* Subroutine */ int srotm_(integer *n, real *sx, integer *incx, real *sy, 
-       integer *incy, real *sparam)
-    /* Initialized data */
-    static real zero = 0.f;
-    static real two = 2.f;
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__;
-    static real w, z__;
-    static integer kx, ky;
-    static real sh11, sh12, sh21, sh22, sflag;
-    static integer nsteps;
-/*     (DX**T) */
-/*     SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
-/*     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0 */
-/*       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0). */
-    /* Parameter adjustments */
-    --sparam;
-    --sy;
-    --sx;
-    /* Function Body */
-    sflag = sparam[1];
-    if (*n <= 0 || sflag + two == zero) {
-       goto L140;
-    }
-    if (! (*incx == *incy && *incx > 0)) {
-       goto L70;
-    }
-    nsteps = *n * *incx;
-    if (sflag < 0.f) {
-       goto L50;
-    } else if (sflag == 0) {
-       goto L10;
-    } else {
-       goto L30;
-    }
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       w = sx[i__];
-       z__ = sy[i__];
-       sx[i__] = w + z__ * sh12;
-       sy[i__] = w * sh21 + z__;
-/* L20: */
-    }
-    goto L140;
-    sh11 = sparam[2];
-    sh22 = sparam[5];
-    i__2 = nsteps;
-    i__1 = *incx;
-    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-       w = sx[i__];
-       z__ = sy[i__];
-       sx[i__] = w * sh11 + z__;
-       sy[i__] = -w + sh22 * z__;
-/* L40: */
-    }
-    goto L140;
-    sh11 = sparam[2];
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    sh22 = sparam[5];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       w = sx[i__];
-       z__ = sy[i__];
-       sx[i__] = w * sh11 + z__ * sh12;
-       sy[i__] = w * sh21 + z__ * sh22;
-/* L60: */
-    }
-    goto L140;
-    kx = 1;
-    ky = 1;
-    if (*incx < 0) {
-       kx = (1 - *n) * *incx + 1;
-    }
-    if (*incy < 0) {
-       ky = (1 - *n) * *incy + 1;
-    }
-    if (sflag < 0.f) {
-       goto L120;
-    } else if (sflag == 0) {
-       goto L80;
-    } else {
-       goto L100;
-    }
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = sx[kx];
-       z__ = sy[ky];
-       sx[kx] = w + z__ * sh12;
-       sy[ky] = w * sh21 + z__;
-       kx += *incx;
-       ky += *incy;
-/* L90: */
-    }
-    goto L140;
-    sh11 = sparam[2];
-    sh22 = sparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = sx[kx];
-       z__ = sy[ky];
-       sx[kx] = w * sh11 + z__;
-       sy[ky] = -w + sh22 * z__;
-       kx += *incx;
-       ky += *incy;
-/* L110: */
-    }
-    goto L140;
-    sh11 = sparam[2];
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    sh22 = sparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = sx[kx];
-       z__ = sy[ky];
-       sx[kx] = w * sh11 + z__ * sh12;
-       sy[ky] = w * sh21 + z__ * sh22;
-       kx += *incx;
-       ky += *incy;
-/* L130: */
-    }
-    return 0;
-} /* srotm_ */
-/* Subroutine */ int srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real 
-       *sparam)
-    /* Initialized data */
-    static real zero = 0.f;
-    static real one = 1.f;
-    static real two = 2.f;
-    static real gam = 4096.f;
-    static real gamsq = 16777200.f;
-    static real rgamsq = 5.96046e-8f;
-    /* Format strings */
-    static char fmt_120[] = "";
-    static char fmt_150[] = "";
-    static char fmt_180[] = "";
-    static char fmt_210[] = "";
-    /* System generated locals */
-    real r__1;
-    /* Local variables */
-    static real su, sp1, sp2, sq2, sq1, sh11, sh21, sh12, sh22;
-    static integer igo;
-    static real sflag, stemp;
-    /* Assigned format variables */
-    static char *igo_fmt;
-/*     SY2)**T. */
-/*     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0 */
-/*       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0). */
-    /* Parameter adjustments */
-    --sparam;
-    /* Function Body */
-    if (! (*sd1 < zero)) {
-       goto L10;
-    }
-/*       GO ZERO-H-D-AND-SX1.. */
-    goto L60;
-    sp2 = *sd2 * *sy1;
-    if (! (sp2 == zero)) {
-       goto L20;
-    }
-    sflag = -two;
-    goto L260;
-/*     REGULAR-CASE.. */
-    sp1 = *sd1 * *sx1;
-    sq2 = sp2 * *sy1;
-    sq1 = sp1 * *sx1;
-    if (! (dabs(sq1) > dabs(sq2))) {
-       goto L40;
-    }
-    sh21 = -(*sy1) / *sx1;
-    sh12 = sp2 / sp1;
-    su = one - sh12 * sh21;
-    if (! (su <= zero)) {
-       goto L30;
-    }
-/*         GO ZERO-H-D-AND-SX1.. */
-    goto L60;
-    sflag = zero;
-    *sd1 /= su;
-    *sd2 /= su;
-    *sx1 *= su;
-/*         GO SCALE-CHECK.. */
-    goto L100;
-    if (! (sq2 < zero)) {
-       goto L50;
-    }
-/*         GO ZERO-H-D-AND-SX1.. */
-    goto L60;
-    sflag = one;
-    sh11 = sp1 / sp2;
-    sh22 = *sx1 / *sy1;
-    su = one + sh11 * sh22;
-    stemp = *sd2 / su;
-    *sd2 = *sd1 / su;
-    *sd1 = stemp;
-    *sx1 = *sy1 * su;
-/*         GO SCALE-CHECK */
-    goto L100;
-/*     PROCEDURE..ZERO-H-D-AND-SX1.. */
-    sflag = -one;
-    sh11 = zero;
-    sh12 = zero;
-    sh21 = zero;
-    sh22 = zero;
-    *sd1 = zero;
-    *sd2 = zero;
-    *sx1 = zero;
-/*         RETURN.. */
-    goto L220;
-/*     PROCEDURE..FIX-H.. */
-    if (! (sflag >= zero)) {
-       goto L90;
-    }
-    if (! (sflag == zero)) {
-       goto L80;
-    }
-    sh11 = one;
-    sh22 = one;
-    sflag = -one;
-    goto L90;
-    sh21 = -one;
-    sh12 = one;
-    sflag = -one;
-    switch (igo) {
-       case 0: goto L120;
-       case 1: goto L150;
-       case 2: goto L180;
-       case 3: goto L210;
-    }
-    if (! (*sd1 <= rgamsq)) {
-       goto L130;
-    }
-    if (*sd1 == zero) {
-       goto L160;
-    }
-    igo = 0;
-    igo_fmt = fmt_120;
-/*              FIX-H.. */
-    goto L70;
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd1 *= r__1 * r__1;
-    *sx1 /= gam;
-    sh11 /= gam;
-    sh12 /= gam;
-    goto L110;
-    if (! (*sd1 >= gamsq)) {
-       goto L160;
-    }
-    igo = 1;
-    igo_fmt = fmt_150;
-/*              FIX-H.. */
-    goto L70;
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd1 /= r__1 * r__1;
-    *sx1 *= gam;
-    sh11 *= gam;
-    sh12 *= gam;
-    goto L140;
-    if (! (dabs(*sd2) <= rgamsq)) {
-       goto L190;
-    }
-    if (*sd2 == zero) {
-       goto L220;
-    }
-    igo = 2;
-    igo_fmt = fmt_180;
-/*              FIX-H.. */
-    goto L70;
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd2 *= r__1 * r__1;
-    sh21 /= gam;
-    sh22 /= gam;
-    goto L170;
-    if (! (dabs(*sd2) >= gamsq)) {
-       goto L220;
-    }
-    igo = 3;
-    igo_fmt = fmt_210;
-/*              FIX-H.. */
-    goto L70;
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd2 /= r__1 * r__1;
-    sh21 *= gam;
-    sh22 *= gam;
-    goto L200;
-    if (sflag < 0.f) {
-       goto L250;
-    } else if (sflag == 0) {
-       goto L230;
-    } else {
-       goto L240;
-    }
-    sparam[3] = sh21;
-    sparam[4] = sh12;
-    goto L260;
-    sparam[2] = sh11;
-    sparam[5] = sh22;
-    goto L260;
-    sparam[2] = sh11;
-    sparam[3] = sh21;
-    sparam[4] = sh12;
-    sparam[5] = sh22;
-    sparam[1] = sflag;
-    return 0;
-} /* srotmg_ */
-/* Subroutine */ int ssbmv_(char *uplo, integer *n, integer *k, real *alpha, 
-       real *a, integer *lda, real *x, integer *incx, real *beta, real *y, 
-       integer *incy, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    /* Local variables */
-    static integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SSBMV  performs the matrix-vector  operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric band matrix, with k super-diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  Y      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*k < 0) {
-       info = 3;
-    } else if (*lda < *k + 1) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("SSBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-/*     First form  y := beta*y. */
-    if (*beta != 1.f) {
-       if (*incy == 1) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.f) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when upper triangle of A is stored. */
-       kplus1 = *k + 1;
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *k;
-               i__4 = j - 1;
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   y[i__] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               ix = kx;
-               iy = ky;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__4 = 1, i__2 = j - *k;
-               i__3 = j - 1;
-               for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                   y[iy] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * 
-                       temp2;
-               jx += *incx;
-               jy += *incy;
-               if (j > *k) {
-                   kx += *incx;
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when lower triangle of A is stored. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               y[j] += temp1 * a[j * a_dim1 + 1];
-               l = 1 - j;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   y[i__] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               y[jy] += temp1 * a[j * a_dim1 + 1];
-               l = 1 - j;
-               ix = jx;
-               iy = jy;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of SSBMV . */
-} /* ssbmv_ */
-/* Subroutine */ int sscal_(integer *n, real *sa, real *sx, integer *incx)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, m, mp1, nincx;
-/*     scales a vector by a constant. */
-/*     uses unrolled loops for increment equal to 1. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --sx;
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       sx[i__] = *sa * sx[i__];
-/* L10: */
-    }
-    return 0;
-/*        code for increment equal to 1 */
-/*        clean-up loop */
-    m = *n % 5;
-    if (m == 0) {
-       goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       sx[i__] = *sa * sx[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-       return 0;
-    }
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 5) {
-       sx[i__] = *sa * sx[i__];
-       sx[i__ + 1] = *sa * sx[i__ + 1];
-       sx[i__ + 2] = *sa * sx[i__ + 2];
-       sx[i__ + 3] = *sa * sx[i__ + 3];
-       sx[i__ + 4] = *sa * sx[i__ + 4];
-/* L50: */
-    }
-    return 0;
-} /* sscal_ */
-/* Subroutine */ int sspmv_(char *uplo, integer *n, real *alpha, real *ap, 
-       real *x, integer *incx, real *beta, real *y, integer *incy, ftnlen 
-       uplo_len)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SSPMV  performs the matrix-vector operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  AP     - REAL             array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 6;
-    } else if (*incy == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("SSPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-/*     First form  y := beta*y. */
-    if (*beta != 1.f) {
-       if (*incy == 1) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.f) {
-       return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when AP contains the upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               k = kk;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * ap[k];
-                   temp2 += ap[k] * x[i__];
-                   ++k;
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-               kk += j;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               ix = kx;
-               iy = ky;
-               i__2 = kk + j - 2;
-               for (k = kk; k <= i__2; ++k) {
-                   y[iy] += temp1 * ap[k];
-                   temp2 += ap[k] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when AP contains the lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               y[j] += temp1 * ap[kk];
-               k = kk + 1;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * ap[k];
-                   temp2 += ap[k] * x[i__];
-                   ++k;
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-               kk += *n - j + 1;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               y[jy] += temp1 * ap[kk];
-               ix = jx;
-               iy = jy;
-               i__2 = kk + *n - j;
-               for (k = kk + 1; k <= i__2; ++k) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * ap[k];
-                   temp2 += ap[k] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-               kk += *n - j + 1;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of SSPMV . */
-} /* sspmv_ */
-/* Subroutine */ int sspr_(char *uplo, integer *n, real *alpha, real *x, 
-       integer *incx, real *ap, ftnlen uplo_len)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SSPR    performs the symmetric rank 1 operation */
-/*     A := alpha*x*x' + A, */
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n symmetric matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  AP     - REAL             array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --ap;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    }
-    if (info != 0) {
-       xerbla_("SSPR  ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-/*     Set the start point in X if the increment is not unity. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when upper triangle is stored in AP. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f) {
-                   temp = *alpha * x[j];
-                   k = kk;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       ap[k] += x[i__] * temp;
-                       ++k;
-/* L10: */
-                   }
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   ix = kx;
-                   i__2 = kk + j - 1;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] += x[ix] * temp;
-                       ix += *incx;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when lower triangle is stored in AP. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f) {
-                   temp = *alpha * x[j];
-                   k = kk;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       ap[k] += x[i__] * temp;
-                       ++k;
-/* L50: */
-                   }
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   ix = jx;
-                   i__2 = kk + *n - j;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] += x[ix] * temp;
-                       ix += *incx;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of SSPR  . */
-} /* sspr_ */
-/* Subroutine */ int sspr2_(char *uplo, integer *n, real *alpha, real *x, 
-       integer *incx, real *y, integer *incy, real *ap, ftnlen uplo_len)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SSPR2  performs the symmetric rank 2 operation */
-/*     A := alpha*x*y' + alpha*y*x' + A, */
-/*  where alpha is a scalar, x and y are n element vectors and A is an */
-/*  n by n symmetric matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  AP     - REAL             array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --ap;
-    --y;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("SSPR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when upper triangle is stored in AP. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f || y[j] != 0.f) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   k = kk;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2;
-                       ++k;
-/* L10: */
-                   }
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f || y[jy] != 0.f) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = kx;
-                   iy = ky;
-                   i__2 = kk + j - 1;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when lower triangle is stored in AP. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f || y[j] != 0.f) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   k = kk;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2;
-                       ++k;
-/* L50: */
-                   }
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f || y[jy] != 0.f) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = jx;
-                   iy = jy;
-                   i__2 = kk + *n - j;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of SSPR2 . */
-} /* sspr2_ */
-/* Subroutine */ int sswap_(integer *n, real *sx, integer *incx, real *sy, 
-       integer *incy)
-    /* System generated locals */
-    integer i__1;
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static real stemp;
-/*     interchanges two vectors. */
-/*     uses unrolled loops for increments equal to 1. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp = sx[ix];
-       sx[ix] = sy[iy];
-       sy[iy] = stemp;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*       code for both increments equal to 1 */
-/*       clean-up loop */
-    m = *n % 3;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp = sx[i__];
-       sx[i__] = sy[i__];
-       sy[i__] = stemp;
-/* L30: */
-    }
-    if (*n < 3) {
-       return 0;
-    }
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 3) {
-       stemp = sx[i__];
-       sx[i__] = sy[i__];
-       sy[i__] = stemp;
-       stemp = sx[i__ + 1];
-       sx[i__ + 1] = sy[i__ + 1];
-       sy[i__ + 1] = stemp;
-       stemp = sx[i__ + 2];
-       sx[i__ + 2] = sy[i__ + 2];
-       sy[i__ + 2] = stemp;
-/* L50: */
-    }
-    return 0;
-} /* sswap_ */
-/* Subroutine */ int ssymm_(char *side, char *uplo, integer *m, integer *n, 
-       real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta,
-        real *c__, integer *ldc, ftnlen side_len, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-    /* Local variables */
-    static integer i__, j, k, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SSYMM  performs one of the matrix-matrix operations */
-/*     C := alpha*A*B + beta*C, */
-/*  or */
-/*     C := alpha*B*A + beta*C, */
-/*  where alpha and beta are scalars,  A is a symmetric matrix and  B and */
-/*  C are  m by n matrices. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  symmetric matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  symmetric  matrix   A  is  to  be */
-/*           referenced as follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is  n otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - REAL             array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  BETA   - REAL            . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-/*  C      - REAL             array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Set NROWA as the number of rows of A. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-/*     Test the input parameters. */
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("SSYMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.f) {
-       if (*beta == 0.f) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = 0.f;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*B + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1 = *alpha * b[i__ + j * b_dim1];
-                   temp2 = 0.f;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1];
-                       temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1];
-/* L50: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] 
-                               + *alpha * temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + temp1 * a[i__ + i__ * a_dim1] + *alpha * 
-                               temp2;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   temp1 = *alpha * b[i__ + j * b_dim1];
-                   temp2 = 0.f;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1];
-                       temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1];
-/* L80: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] 
-                               + *alpha * temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + temp1 * a[i__ + i__ * a_dim1] + *alpha * 
-                               temp2;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*B*A + beta*C. */
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           temp1 = *alpha * a[j + j * a_dim1];
-           if (*beta == 0.f) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = temp1 * b[i__ + j * b_dim1];
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + 
-                           temp1 * b[i__ + j * b_dim1];
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   temp1 = *alpha * a[k + j * a_dim1];
-               } else {
-                   temp1 = *alpha * a[j + k * a_dim1];
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1];
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   temp1 = *alpha * a[j + k * a_dim1];
-               } else {
-                   temp1 = *alpha * a[k + j * a_dim1];
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1];
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-    return 0;
-/*     End of SSYMM . */
-} /* ssymm_ */
-/* Subroutine */ int ssymv_(char *uplo, integer *n, real *alpha, real *a, 
-       integer *lda, real *x, integer *incx, real *beta, real *y, integer *
-       incy, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SSYMV  performs the matrix-vector  operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*lda < max(1,*n)) {
-       info = 5;
-    } else if (*incx == 0) {
-       info = 7;
-    } else if (*incy == 0) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("SSYMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-/*     First form  y := beta*y. */
-    if (*beta != 1.f) {
-       if (*incy == 1) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.f) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when A is stored in upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               ix = kx;
-               iy = ky;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[iy] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when A is stored in lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               y[j] += temp1 * a[j + j * a_dim1];
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               y[jy] += temp1 * a[j + j * a_dim1];
-               ix = jx;
-               iy = jy;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of SSYMV . */
-} /* ssymv_ */
-/* Subroutine */ int ssyr_(char *uplo, integer *n, real *alpha, real *x, 
-       integer *incx, real *a, integer *lda, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SSYR   performs the symmetric rank 1 operation */
-/*     A := alpha*x*x' + A, */
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n symmetric matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*lda < max(1,*n)) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("SSYR  ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-/*     Set the start point in X if the increment is not unity. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when A is stored in upper triangle. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f) {
-                   temp = *alpha * x[j];
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-                   }
-               }
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   ix = kx;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[ix] * temp;
-                       ix += *incx;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when A is stored in lower triangle. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f) {
-                   temp = *alpha * x[j];
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[i__] * temp;
-/* L50: */
-                   }
-               }
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   ix = jx;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[ix] * temp;
-                       ix += *incx;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of SSYR  . */
-} /* ssyr_ */
-/* Subroutine */ int ssyr2_(char *uplo, integer *n, real *alpha, real *x, 
-       integer *incx, real *y, integer *incy, real *a, integer *lda, ftnlen 
-       uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SSYR2  performs the symmetric rank 2 operation */
-/*     A := alpha*x*y' + alpha*y*x' + A, */
-/*  where alpha is a scalar, x and y are n element vectors and A is an n */
-/*  by n symmetric matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*n)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("SSYR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when A is stored in the upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f || y[j] != 0.f) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * 
-                               temp1 + y[i__] * temp2;
-/* L10: */
-                   }
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f || y[jy] != 0.f) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = kx;
-                   iy = ky;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * 
-                               temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when A is stored in the lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f || y[j] != 0.f) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * 
-                               temp1 + y[i__] * temp2;
-/* L50: */
-                   }
-               }
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f || y[jy] != 0.f) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = jx;
-                   iy = jy;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * 
-                               temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of SSYR2 . */
-} /* ssyr2_ */
-/* Subroutine */ int ssyr2k_(char *uplo, char *trans, integer *n, integer *k, 
-       real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta,
-        real *c__, integer *ldc, ftnlen uplo_len, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-    /* Local variables */
-    static integer i__, j, l, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SSYR2K  performs one of the symmetric rank 2k operations */
-/*     C := alpha*A*B' + alpha*B*A' + beta*C, */
-/*  or */
-/*     C := alpha*A'*B + alpha*B'*A + beta*C, */
-/*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix */
-/*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n */
-/*  matrices in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' + */
-/*                                        beta*C. */
-/*              TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A + */
-/*                                        beta*C. */
-/*              TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A + */
-/*                                        beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number */
-/*           of rows of the matrices  A and B.  K must be at least  zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  B      - REAL             array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - REAL             array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("SSYR2K", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.f) {
-       if (upper) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L90: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f) 
-                           {
-                       temp1 = *alpha * b[j + l * b_dim1];
-                       temp2 = *alpha * a[j + l * a_dim1];
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-                                   i__ + l * a_dim1] * temp1 + b[i__ + l * 
-                                   b_dim1] * temp2;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L140: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f) 
-                           {
-                       temp1 = *alpha * b[j + l * b_dim1];
-                       temp2 = *alpha * a[j + l * a_dim1];
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-                                   i__ + l * a_dim1] * temp1 + b[i__ + l * 
-                                   b_dim1] * temp2;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1 = 0.f;
-                   temp2 = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-                       temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L190: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * 
-                               temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + *alpha * temp1 + *alpha * temp2;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1 = 0.f;
-                   temp2 = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-                       temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L220: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * 
-                               temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + *alpha * temp1 + *alpha * temp2;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-    return 0;
-/*     End of SSYR2K. */
-} /* ssyr2k_ */
-/* Subroutine */ int ssyrk_(char *uplo, char *trans, integer *n, integer *k, 
-       real *alpha, real *a, integer *lda, real *beta, real *c__, integer *
-       ldc, ftnlen uplo_len, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
-    /* Local variables */
-    static integer i__, j, l, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  SSYRK  performs one of the symmetric rank k operations */
-/*     C := alpha*A*A' + beta*C, */
-/*  or */
-/*     C := alpha*A'*A + beta*C, */
-/*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix */
-/*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix */
-/*  in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C. */
-/*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C. */
-/*              TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number */
-/*           of rows of the matrix  A.  K must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - REAL             array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("SSYRK ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.f) {
-       if (upper) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*A' + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L90: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.f) {
-                       temp = *alpha * a[j + l * a_dim1];
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L140: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.f) {
-                       temp = *alpha * a[j + l * a_dim1];
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*A'*A + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L190: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L220: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-    return 0;
-/*     End of SSYRK . */
-} /* ssyrk_ */
-/* Subroutine */ int stbmv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, real *a, integer *lda, real *x, integer *incx, ftnlen 
-       uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  STBMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("STBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*         Form  x := A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[kplus1 + j * a_dim1];
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           x[ix] += temp * a[l + i__ + j * a_dim1];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[kplus1 + j * a_dim1];
-                       }
-                   }
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       l = 1 - j;
-/* Computing MIN */
-                       i__1 = *n, i__3 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                           x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j * a_dim1 + 1];
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__4 = *n, i__1 = j + *k;
-                       i__3 = j + 1;
-                       for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                           x[ix] += temp * a[l + i__ + j * a_dim1];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j * a_dim1 + 1];
-                       }
-                   }
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   l = kplus1 - j;
-                   if (nounit) {
-                       temp *= a[kplus1 + j * a_dim1];
-                   }
-/* Computing MAX */
-                   i__4 = 1, i__1 = j - *k;
-                   i__3 = max(i__4,i__1);
-                   for (i__ = j - 1; i__ >= i__3; --i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   kx -= *incx;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (nounit) {
-                       temp *= a[kplus1 + j * a_dim1];
-                   }
-/* Computing MAX */
-                   i__4 = 1, i__1 = j - *k;
-                   i__3 = max(i__4,i__1);
-                   for (i__ = j - 1; i__ >= i__3; --i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   temp = x[j];
-                   l = 1 - j;
-                   if (nounit) {
-                       temp *= a[j * a_dim1 + 1];
-                   }
-/* Computing MIN */
-                   i__1 = *n, i__2 = j + *k;
-                   i__4 = min(i__1,i__2);
-                   for (i__ = j + 1; i__ <= i__4; ++i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   temp = x[jx];
-                   kx += *incx;
-                   ix = kx;
-                   l = 1 - j;
-                   if (nounit) {
-                       temp *= a[j * a_dim1 + 1];
-                   }
-/* Computing MIN */
-                   i__1 = *n, i__2 = j + *k;
-                   i__4 = min(i__1,i__2);
-                   for (i__ = j + 1; i__ <= i__4; ++i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of STBMV . */
-} /* stbmv_ */
-/* Subroutine */ int stbsv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, real *a, integer *lda, real *x, integer *incx, ftnlen 
-       uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  STBSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular band matrix, with ( k + 1 ) */
-/*  diagonals. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("STBSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed by sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       l = kplus1 - j;
-                       if (nounit) {
-                           x[j] /= a[kplus1 + j * a_dim1];
-                       }
-                       temp = x[j];
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           x[i__] -= temp * a[l + i__ + j * a_dim1];
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   kx -= *incx;
-                   if (x[jx] != 0.f) {
-                       ix = kx;
-                       l = kplus1 - j;
-                       if (nounit) {
-                           x[jx] /= a[kplus1 + j * a_dim1];
-                       }
-                       temp = x[jx];
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           x[ix] -= temp * a[l + i__ + j * a_dim1];
-                           ix -= *incx;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       l = 1 - j;
-                       if (nounit) {
-                           x[j] /= a[j * a_dim1 + 1];
-                       }
-                       temp = x[j];
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * a[l + i__ + j * a_dim1];
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   kx += *incx;
-                   if (x[jx] != 0.f) {
-                       ix = kx;
-                       l = 1 - j;
-                       if (nounit) {
-                           x[jx] /= a[j * a_dim1 + 1];
-                       }
-                       temp = x[jx];
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[ix] -= temp * a[l + i__ + j * a_dim1];
-                           ix += *incx;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A')*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   l = kplus1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *k;
-                   i__4 = j - 1;
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= a[kplus1 + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   l = kplus1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *k;
-                   i__3 = j - 1;
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= a[kplus1 + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   l = 1 - j;
-/* Computing MIN */
-                   i__1 = *n, i__3 = j + *k;
-                   i__4 = j + 1;
-                   for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= a[j * a_dim1 + 1];
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   l = 1 - j;
-/* Computing MIN */
-                   i__4 = *n, i__1 = j + *k;
-                   i__3 = j + 1;
-                   for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= a[j * a_dim1 + 1];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of STBSV . */
-} /* stbsv_ */
-/* Subroutine */ int stpmv_(char *uplo, char *trans, char *diag, integer *n, 
-       real *ap, real *x, integer *incx, ftnlen uplo_len, ftnlen trans_len, 
-       ftnlen diag_len)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  STPMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  AP     - REAL             array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("STPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x:= A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       k = kk;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[i__] += temp * ap[k];
-                           ++k;
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= ap[kk + j - 1];
-                       }
-                   }
-                   kk += j;
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           x[ix] += temp * ap[k];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= ap[kk + j - 1];
-                       }
-                   }
-                   jx += *incx;
-                   kk += j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       k = kk;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[i__] += temp * ap[k];
-                           --k;
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= ap[kk - *n + j];
-                       }
-                   }
-                   kk -= *n - j + 1;
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           x[ix] += temp * ap[k];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= ap[kk - *n + j];
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   k = kk - 1;
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       temp += ap[k] * x[i__];
-                       --k;
-/* L90: */
-                   }
-                   x[j] = temp;
-                   kk -= j;
-/* L100: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   i__1 = kk - j + 1;
-                   for (k = kk - 1; k >= i__1; --k) {
-                       ix -= *incx;
-                       temp += ap[k] * x[ix];
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   kk -= j;
-/* L120: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       temp += ap[k] * x[i__];
-                       ++k;
-/* L130: */
-                   }
-                   x[j] = temp;
-                   kk += *n - j + 1;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       temp += ap[k] * x[ix];
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of STPMV . */
-} /* stpmv_ */
-/* Subroutine */ int stpsv_(char *uplo, char *trans, char *diag, integer *n, 
-       real *ap, real *x, integer *incx, ftnlen uplo_len, ftnlen trans_len, 
-       ftnlen diag_len)
-    /* System generated locals */
-    integer i__1, i__2;
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  STPSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix, supplied in packed form. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  AP     - REAL             array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("STPSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       if (nounit) {
-                           x[j] /= ap[kk];
-                       }
-                       temp = x[j];
-                       k = kk - 1;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           x[i__] -= temp * ap[k];
-                           --k;
-/* L10: */
-                       }
-                   }
-                   kk -= j;
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.f) {
-                       if (nounit) {
-                           x[jx] /= ap[kk];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           x[ix] -= temp * ap[k];
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       if (nounit) {
-                           x[j] /= ap[kk];
-                       }
-                       temp = x[j];
-                       k = kk + 1;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * ap[k];
-                           ++k;
-/* L50: */
-                       }
-                   }
-                   kk += *n - j + 1;
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.f) {
-                       if (nounit) {
-                           x[jx] /= ap[kk];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           x[ix] -= temp * ap[k];
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A' )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= ap[k] * x[i__];
-                       ++k;
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk + j - 1];
-                   }
-                   x[j] = temp;
-                   kk += j;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       temp -= ap[k] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk + j - 1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   kk += j;
-/* L120: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   k = kk;
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= ap[k] * x[i__];
-                       --k;
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk - *n + j];
-                   }
-                   x[j] = temp;
-                   kk -= *n - j + 1;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__1 = kk - (*n - (j + 1));
-                   for (k = kk; k >= i__1; --k) {
-                       temp -= ap[k] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk - *n + j];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of STPSV . */
-} /* stpsv_ */
-/* Subroutine */ int strmm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, 
-       integer *ldb, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, 
-       ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
-    /* Local variables */
-    static integer i__, j, k, info;
-    static real temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  STRMM  performs one of the matrix-matrix operations */
-/*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ), */
-/*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-/*     op( A ) = A   or   op( A ) = A'. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE specifies whether  op( A ) multiplies B from */
-/*           the left or right as follows: */
-/*              SIDE = 'L' or 'l'   B := alpha*op( A )*B. */
-/*              SIDE = 'R' or 'r'   B := alpha*B*op( A ). */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-/*              TRANSA = 'C' or 'c'   op( A ) = A'. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - REAL             array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain the matrix  B,  and  on exit  is overwritten  by the */
-/*           transformed matrix. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("STRMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.f) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               b[i__ + j * b_dim1] = 0.f;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*A*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (b[k + j * b_dim1] != 0.f) {
-                           temp = *alpha * b[k + j * b_dim1];
-                           i__3 = k - 1;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * a[i__ + k * 
-                                       a_dim1];
-/* L30: */
-                           }
-                           if (nounit) {
-                               temp *= a[k + k * a_dim1];
-                           }
-                           b[k + j * b_dim1] = temp;
-                       }
-/* L40: */
-                   }
-/* L50: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (k = *m; k >= 1; --k) {
-                       if (b[k + j * b_dim1] != 0.f) {
-                           temp = *alpha * b[k + j * b_dim1];
-                           b[k + j * b_dim1] = temp;
-                           if (nounit) {
-                               b[k + j * b_dim1] *= a[k + k * a_dim1];
-                           }
-                           i__2 = *m;
-                           for (i__ = k + 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * a[i__ + k * 
-                                       a_dim1];
-/* L60: */
-                           }
-                       }
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*A'*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       temp = b[i__ + j * b_dim1];
-                       if (nounit) {
-                           temp *= a[i__ + i__ * a_dim1];
-                       }
-                       i__2 = i__ - 1;
-                       for (k = 1; k <= i__2; ++k) {
-                           temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L90: */
-                       }
-                       b[i__ + j * b_dim1] = *alpha * temp;
-/* L100: */
-                   }
-/* L110: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp = b[i__ + j * b_dim1];
-                       if (nounit) {
-                           temp *= a[i__ + i__ * a_dim1];
-                       }
-                       i__3 = *m;
-                       for (k = i__ + 1; k <= i__3; ++k) {
-                           temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L120: */
-                       }
-                       b[i__ + j * b_dim1] = *alpha * temp;
-/* L130: */
-                   }
-/* L140: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*B*A. */
-           if (upper) {
-               for (j = *n; j >= 1; --j) {
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__1 = *m;
-                   for (i__ = 1; i__ <= i__1; ++i__) {
-                       b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L150: */
-                   }
-                   i__1 = j - 1;
-                   for (k = 1; k <= i__1; ++k) {
-                       if (a[k + j * a_dim1] != 0.f) {
-                           temp = *alpha * a[k + j * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L160: */
-                           }
-                       }
-/* L170: */
-                   }
-/* L180: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L190: */
-                   }
-                   i__2 = *n;
-                   for (k = j + 1; k <= i__2; ++k) {
-                       if (a[k + j * a_dim1] != 0.f) {
-                           temp = *alpha * a[k + j * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L200: */
-                           }
-                       }
-/* L210: */
-                   }
-/* L220: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*B*A'. */
-           if (upper) {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   i__2 = k - 1;
-                   for (j = 1; j <= i__2; ++j) {
-                       if (a[j + k * a_dim1] != 0.f) {
-                           temp = *alpha * a[j + k * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L230: */
-                           }
-                       }
-/* L240: */
-                   }
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[k + k * a_dim1];
-                   }
-                   if (temp != 1.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L250: */
-                       }
-                   }
-/* L260: */
-               }
-           } else {
-               for (k = *n; k >= 1; --k) {
-                   i__1 = *n;
-                   for (j = k + 1; j <= i__1; ++j) {
-                       if (a[j + k * a_dim1] != 0.f) {
-                           temp = *alpha * a[j + k * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L270: */
-                           }
-                       }
-/* L280: */
-                   }
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[k + k * a_dim1];
-                   }
-                   if (temp != 1.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L290: */
-                       }
-                   }
-/* L300: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of STRMM . */
-} /* strmm_ */
-/* Subroutine */ int strmv_(char *uplo, char *trans, char *diag, integer *n, 
-       real *a, integer *lda, real *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  STRMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("STRMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[i__] += temp * a[i__ + j * a_dim1];
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j + j * a_dim1];
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[ix] += temp * a[i__ + j * a_dim1];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j + j * a_dim1];
-                       }
-                   }
-                   jx += *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j + j * a_dim1];
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[ix] += temp * a[i__ + j * a_dim1];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j + j * a_dim1];
-                       }
-                   }
-                   jx -= *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       ix -= *incx;
-                       temp += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       temp += a[i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       temp += a[i__ + j * a_dim1] * x[ix];
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of STRMV . */
-} /* strmv_ */
-/* Subroutine */ int strsm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, 
-       integer *ldb, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, 
-       ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
-    /* Local variables */
-    static integer i__, j, k, info;
-    static real temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  STRSM  solves one of the matrix equations */
-/*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B, */
-/*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-/*     op( A ) = A   or   op( A ) = A'. */
-/*  The matrix X is overwritten on B. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry, SIDE specifies whether op( A ) appears on the left */
-/*           or right of X as follows: */
-/*              SIDE = 'L' or 'l'   op( A )*X = alpha*B. */
-/*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B. */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-/*              TRANSA = 'C' or 'c'   op( A ) = A'. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - REAL            . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - REAL             array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain  the  right-hand  side  matrix  B,  and  on exit  is */
-/*           overwritten by the solution matrix  X. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("STRSM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.f) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               b[i__ + j * b_dim1] = 0.f;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*inv( A )*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L30: */
-                       }
-                   }
-                   for (k = *m; k >= 1; --k) {
-                       if (b[k + j * b_dim1] != 0.f) {
-                           if (nounit) {
-                               b[k + j * b_dim1] /= a[k + k * a_dim1];
-                           }
-                           i__2 = k - 1;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-                                       i__ + k * a_dim1];
-/* L40: */
-                           }
-                       }
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L70: */
-                       }
-                   }
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (b[k + j * b_dim1] != 0.f) {
-                           if (nounit) {
-                               b[k + j * b_dim1] /= a[k + k * a_dim1];
-                           }
-                           i__3 = *m;
-                           for (i__ = k + 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-                                       i__ + k * a_dim1];
-/* L80: */
-                           }
-                       }
-/* L90: */
-                   }
-/* L100: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*inv( A' )*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp = *alpha * b[i__ + j * b_dim1];
-                       i__3 = i__ - 1;
-                       for (k = 1; k <= i__3; ++k) {
-                           temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L110: */
-                       }
-                       if (nounit) {
-                           temp /= a[i__ + i__ * a_dim1];
-                       }
-                       b[i__ + j * b_dim1] = temp;
-/* L120: */
-                   }
-/* L130: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       temp = *alpha * b[i__ + j * b_dim1];
-                       i__2 = *m;
-                       for (k = i__ + 1; k <= i__2; ++k) {
-                           temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L140: */
-                       }
-                       if (nounit) {
-                           temp /= a[i__ + i__ * a_dim1];
-                       }
-                       b[i__ + j * b_dim1] = temp;
-/* L150: */
-                   }
-/* L160: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*B*inv( A ). */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L170: */
-                       }
-                   }
-                   i__2 = j - 1;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (a[k + j * a_dim1] != 0.f) {
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-                                       i__ + k * b_dim1];
-/* L180: */
-                           }
-                       }
-/* L190: */
-                   }
-                   if (nounit) {
-                       temp = 1.f / a[j + j * a_dim1];
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L200: */
-                       }
-                   }
-/* L210: */
-               }
-           } else {
-               for (j = *n; j >= 1; --j) {
-                   if (*alpha != 1.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L220: */
-                       }
-                   }
-                   i__1 = *n;
-                   for (k = j + 1; k <= i__1; ++k) {
-                       if (a[k + j * a_dim1] != 0.f) {
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-                                       i__ + k * b_dim1];
-/* L230: */
-                           }
-                       }
-/* L240: */
-                   }
-                   if (nounit) {
-                       temp = 1.f / a[j + j * a_dim1];
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L250: */
-                       }
-                   }
-/* L260: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*B*inv( A' ). */
-           if (upper) {
-               for (k = *n; k >= 1; --k) {
-                   if (nounit) {
-                       temp = 1.f / a[k + k * a_dim1];
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L270: */
-                       }
-                   }
-                   i__1 = k - 1;
-                   for (j = 1; j <= i__1; ++j) {
-                       if (a[j + k * a_dim1] != 0.f) {
-                           temp = a[j + k * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= temp * b[i__ + k * 
-                                       b_dim1];
-/* L280: */
-                           }
-                       }
-/* L290: */
-                   }
-                   if (*alpha != 1.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-                                   ;
-/* L300: */
-                       }
-                   }
-/* L310: */
-               }
-           } else {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   if (nounit) {
-                       temp = 1.f / a[k + k * a_dim1];
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L320: */
-                       }
-                   }
-                   i__2 = *n;
-                   for (j = k + 1; j <= i__2; ++j) {
-                       if (a[j + k * a_dim1] != 0.f) {
-                           temp = a[j + k * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= temp * b[i__ + k * 
-                                       b_dim1];
-/* L330: */
-                           }
-                       }
-/* L340: */
-                   }
-                   if (*alpha != 1.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-                                   ;
-/* L350: */
-                       }
-                   }
-/* L360: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of STRSM . */
-} /* strsm_ */
-/* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, integer *n, 
-       real *a, integer *lda, real *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  STRSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("STRSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       if (nounit) {
-                           x[j] /= a[j + j * a_dim1];
-                       }
-                       temp = x[j];
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           x[i__] -= temp * a[i__ + j * a_dim1];
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.f) {
-                       if (nounit) {
-                           x[jx] /= a[j + j * a_dim1];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           x[ix] -= temp * a[i__ + j * a_dim1];
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       if (nounit) {
-                           x[j] /= a[j + j * a_dim1];
-                       }
-                       temp = x[j];
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * a[i__ + j * a_dim1];
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.f) {
-                       if (nounit) {
-                           x[jx] /= a[j + j * a_dim1];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           x[ix] -= temp * a[i__ + j * a_dim1];
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A' )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= a[i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= a[i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= a[i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of STRSV . */
-} /* strsv_ */
-/* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len)
-    /* Format strings */
-    static char fmt_9999[] = "(\002 ** On entry to \002,a6,\002 parameter nu"
-           "mber \002,i2,\002 had \002,\002an illegal value\002)";
-    /* Builtin functions */
-    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
-    /* Subroutine */ int s_stop(char *, ftnlen);
-    /* Fortran I/O blocks */
-    static cilist io___916 = { 0, 6, 0, fmt_9999, 0 };
-/*  -- LAPACK auxiliary routine (preliminary version) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
-/*     Courant Institute, Argonne National Lab, and Rice University */
-/*     February 29, 1992 */
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  XERBLA  is an error handler for the LAPACK routines. */
-/*  It is called by an LAPACK routine if an input parameter has an */
-/*  invalid value.  A message is printed and execution stops. */
-/*  Installers may consider modifying the STOP statement in order to */
-/*  call system-specific exception-handling facilities. */
-/*  Arguments */
-/*  ========= */
-/*  SRNAME  (input) CHARACTER*6 */
-/*          The name of the routine which called XERBLA. */
-/*  INFO    (input) INTEGER */
-/*          The position of the invalid parameter in the parameter list */
-/*          of the calling routine. */
-    s_wsfe(&io___916);
-    do_fio(&c__1, srname, (ftnlen)6);
-    do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer));
-    e_wsfe();
-    s_stop("", (ftnlen)0);
-/*     End of XERBLA */
-    return 0;
-} /* xerbla_ */
-/* Subroutine */ int zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx, 
-       integer *incx, doublecomplex *zy, integer *incy)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    doublecomplex z__1, z__2;
-    /* Local variables */
-    static integer i__, ix, iy;
-    extern doublereal dcabs1_(doublecomplex *);
-/*     constant times a vector plus a vector. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (dcabs1_(za) == 0.) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = iy;
-       i__3 = iy;
-       i__4 = ix;
-       z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[
-               i__4].i + za->i * zx[i__4].r;
-       z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i;
-       zy[i__2].r = z__1.r, zy[i__2].i = z__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*        code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       i__4 = i__;
-       z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[
-               i__4].i + za->i * zx[i__4].r;
-       z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i;
-       zy[i__2].r = z__1.r, zy[i__2].i = z__1.i;
-/* L30: */
-    }
-    return 0;
-} /* zaxpy_ */
-/* Subroutine */ int zcopy_(integer *n, doublecomplex *zx, integer *incx, 
-       doublecomplex *zy, integer *incy)
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    /* Local variables */
-    static integer i__, ix, iy;
-/*     copies a vector, x, to a vector, y. */
-/*     jack dongarra, linpack, 4/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = iy;
-       i__3 = ix;
-       zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*        code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i;
-/* L30: */
-    }
-    return 0;
-} /* zcopy_ */
-/* Double Complex */ VOID zdotc_(doublecomplex * ret_val, integer *n, 
-       doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy)
-    /* System generated locals */
-    integer i__1, i__2;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublecomplex ztemp;
-/*     forms the dot product of a vector. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-    /* Function Body */
-    ztemp.r = 0., ztemp.i = 0.;
-     ret_val->r = 0.,  ret_val->i = 0.;
-    if (*n <= 0) {
-       return ;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       d_cnjg(&z__3, &zx[ix]);
-       i__2 = iy;
-       z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r * 
-               zy[i__2].i + z__3.i * zy[i__2].r;
-       z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
-    return ;
-/*        code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       d_cnjg(&z__3, &zx[i__]);
-       i__2 = i__;
-       z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r * 
-               zy[i__2].i + z__3.i * zy[i__2].r;
-       z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-/* L30: */
-    }
-     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
-    return ;
-} /* zdotc_ */
-/* Double Complex */ VOID zdotu_(doublecomplex * ret_val, integer *n, 
-       doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy)
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    doublecomplex z__1, z__2;
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublecomplex ztemp;
-/*     forms the dot product of two vectors. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-    /* Function Body */
-    ztemp.r = 0., ztemp.i = 0.;
-     ret_val->r = 0.,  ret_val->i = 0.;
-    if (*n <= 0) {
-       return ;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       i__3 = iy;
-       z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i = 
-               zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r;
-       z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
-    return ;
-/*        code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i = 
-               zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r;
-       z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-/* L30: */
-    }
-     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
-    return ;
-} /* zdotu_ */
-/* Subroutine */ int zdrot_(integer *n, doublecomplex *zx, integer *incx, 
-       doublecomplex *zy, integer *incy, doublereal *c__, doublereal *s)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    doublecomplex z__1, z__2, z__3;
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublecomplex ztemp;
-/*     applies a plane rotation, where the cos and sin (c and s) are */
-/*     double precision and the vectors zx and zy are double complex. */
-/*     jack dongarra, linpack, 3/11/78. */
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       z__2.r = *c__ * zx[i__2].r, z__2.i = *c__ * zx[i__2].i;
-       i__3 = iy;
-       z__3.r = *s * zy[i__3].r, z__3.i = *s * zy[i__3].i;
-       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-       i__2 = iy;
-       i__3 = iy;
-       z__2.r = *c__ * zy[i__3].r, z__2.i = *c__ * zy[i__3].i;
-       i__4 = ix;
-       z__3.r = *s * zx[i__4].r, z__3.i = *s * zx[i__4].i;
-       z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
-       zy[i__2].r = z__1.r, zy[i__2].i = z__1.i;
-       i__2 = ix;
-       zx[i__2].r = ztemp.r, zx[i__2].i = ztemp.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*       code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       z__2.r = *c__ * zx[i__2].r, z__2.i = *c__ * zx[i__2].i;
-       i__3 = i__;
-       z__3.r = *s * zy[i__3].r, z__3.i = *s * zy[i__3].i;
-       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-       i__2 = i__;
-       i__3 = i__;
-       z__2.r = *c__ * zy[i__3].r, z__2.i = *c__ * zy[i__3].i;
-       i__4 = i__;
-       z__3.r = *s * zx[i__4].r, z__3.i = *s * zx[i__4].i;
-       z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
-       zy[i__2].r = z__1.r, zy[i__2].i = z__1.i;
-       i__2 = i__;
-       zx[i__2].r = ztemp.r, zx[i__2].i = ztemp.i;
-/* L30: */
-    }
-    return 0;
-} /* zdrot_ */
-/* Subroutine */ int zdscal_(integer *n, doublereal *da, doublecomplex *zx, 
-       integer *incx)
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    doublecomplex z__1, z__2;
-    /* Local variables */
-    static integer i__, ix;
-/*     scales a vector by a constant. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --zx;
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    ix = 1;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       z__2.r = *da, z__2.i = 0.;
-       i__3 = ix;
-       z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r * 
-               zx[i__3].i + z__2.i * zx[i__3].r;
-       zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
-       ix += *incx;
-/* L10: */
-    }
-    return 0;
-/*        code for increment equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       z__2.r = *da, z__2.i = 0.;
-       i__3 = i__;
-       z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r * 
-               zx[i__3].i + z__2.i * zx[i__3].r;
-       zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
-/* L30: */
-    }
-    return 0;
-} /* zdscal_ */
-/* Subroutine */ int zgbmv_(char *trans, integer *m, integer *n, integer *kl, 
-       integer *ku, doublecomplex *alpha, doublecomplex *a, integer *lda, 
-       doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *
-       y, integer *incy, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, k, ix, iy, jx, jy, kx, ky, kup1, info;
-    static doublecomplex temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZGBMV  performs one of the matrix-vector operations */
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or */
-/*     y := alpha*conjg( A' )*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n band matrix, with kl sub-diagonals and ku super-diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-/*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  KL     - INTEGER. */
-/*           On entry, KL specifies the number of sub-diagonals of the */
-/*           matrix A. KL must satisfy  0 .le. KL. */
-/*           Unchanged on exit. */
-/*  KU     - INTEGER. */
-/*           On entry, KU specifies the number of super-diagonals of the */
-/*           matrix A. KU must satisfy  0 .le. KU. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading ( kl + ku + 1 ) by n part of the */
-/*           array A must contain the matrix of coefficients, supplied */
-/*           column by column, with the leading diagonal of the matrix in */
-/*           row ( ku + 1 ) of the array, the first super-diagonal */
-/*           starting at position 2 in row ku, the first sub-diagonal */
-/*           starting at position 1 in row ( ku + 2 ), and so on. */
-/*           Elements in the array A that do not correspond to elements */
-/*           in the band matrix (such as the top left ku by ku triangle) */
-/*           are not referenced. */
-/*           The following program segment will transfer a band matrix */
-/*           from conventional full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    K = KU + 1 - J */
-/*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */
-/*                       A( K + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( kl + ku + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*kl < 0) {
-       info = 4;
-    } else if (*ku < 0) {
-       info = 5;
-    } else if (*lda < *kl + *ku + 1) {
-       info = 8;
-    } else if (*incx == 0) {
-       info = 10;
-    } else if (*incy == 0) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("ZGBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 
-           1. && beta->i == 0.)) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the band part of A. */
-/*     First form  y := beta*y. */
-    if (beta->r != 1. || beta->i != 0.) {
-       if (*incy == 1) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-    kup1 = *ku + 1;
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   i__2 = jx;
-                   z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       i__2 = i__;
-                       i__3 = i__;
-                       i__5 = k + i__ + j * a_dim1;
-                       z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + 
-                               z__2.i;
-                       y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__4 = jx;
-               if (x[i__4].r != 0. || x[i__4].i != 0.) {
-                   i__4 = jx;
-                   z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, 
-                           z__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4]
-                           .r;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   iy = ky;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       i__4 = iy;
-                       i__2 = iy;
-                       i__5 = k + i__ + j * a_dim1;
-                       z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + 
-                               z__2.i;
-                       y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               if (j > *ku) {
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0., temp.i = 0.;
-               k = kup1 - j;
-               if (noconj) {
-/* Computing MAX */
-                   i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__2 = min(i__5,i__6);
-                   for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                       i__3 = k + i__ + j * a_dim1;
-                       i__4 = i__;
-                       z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                   }
-               } else {
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       d_cnjg(&z__3, &a[k + i__ + j * a_dim1]);
-                       i__2 = i__;
-                       z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                               z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                   }
-               }
-               i__4 = jy;
-               i__2 = jy;
-               z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-               y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-               jy += *incy;
-/* L110: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0., temp.i = 0.;
-               ix = kx;
-               k = kup1 - j;
-               if (noconj) {
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       i__4 = k + i__ + j * a_dim1;
-                       i__2 = ix;
-                       z__2.r = a[i__4].r * x[i__2].r - a[i__4].i * x[i__2]
-                               .i, z__2.i = a[i__4].r * x[i__2].i + a[i__4]
-                               .i * x[i__2].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       ix += *incx;
-/* L120: */
-                   }
-               } else {
-/* Computing MAX */
-                   i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__2 = min(i__5,i__6);
-                   for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                       d_cnjg(&z__3, &a[k + i__ + j * a_dim1]);
-                       i__3 = ix;
-                       z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                               z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       ix += *incx;
-/* L130: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jy += *incy;
-               if (j > *ku) {
-                   kx += *incx;
-               }
-/* L140: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZGBMV . */
-} /* zgbmv_ */
-/* Subroutine */ int zgemm_(char *transa, char *transb, integer *m, integer *
-       n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, 
-       doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex *
-       c__, integer *ldc, ftnlen transa_len, ftnlen transb_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    doublecomplex z__1, z__2, z__3, z__4;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, l, info;
-    static logical nota, notb;
-    static doublecomplex temp;
-    static logical conja, conjb;
-    static integer ncola;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa, nrowb;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZGEMM  performs one of the matrix-matrix operations */
-/*     C := alpha*op( A )*op( B ) + beta*C, */
-/*  where  op( X ) is one of */
-/*     op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ), */
-/*  alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
-/*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n',  op( A ) = A. */
-/*              TRANSA = 'T' or 't',  op( A ) = A'. */
-/*              TRANSA = 'C' or 'c',  op( A ) = conjg( A' ). */
-/*           Unchanged on exit. */
-/*           On entry, TRANSB specifies the form of op( B ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSB = 'N' or 'n',  op( B ) = B. */
-/*              TRANSB = 'T' or 't',  op( B ) = B'. */
-/*              TRANSB = 'C' or 'c',  op( B ) = conjg( B' ). */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies  the number  of rows  of the  matrix */
-/*           op( A )  and of the  matrix  C.  M  must  be at least  zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N  specifies the number  of columns of the matrix */
-/*           op( B ) and the number of columns of the matrix C. N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry,  K  specifies  the number of columns of the matrix */
-/*           op( A ) and the number of rows of the matrix op( B ). K must */
-/*           be at least  zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise. */
-/*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by m  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is */
-/*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise. */
-/*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  n by k  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then */
-/*           LDB must be at least  max( 1, k ), otherwise  LDB must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX*16      . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n  matrix */
-/*           ( alpha*op( A )*op( B ) + beta*C ). */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not */
-/*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and */
-/*     B  respectively are to be  transposed but  not conjugated  and set */
-/*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A */
-/*     and the number of rows of  B  respectively. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    nota = lsame_(transa, "N", (ftnlen)1, (ftnlen)1);
-    notb = lsame_(transb, "N", (ftnlen)1, (ftnlen)1);
-    conja = lsame_(transa, "C", (ftnlen)1, (ftnlen)1);
-    conjb = lsame_(transb, "C", (ftnlen)1, (ftnlen)1);
-    if (nota) {
-       nrowa = *m;
-       ncola = *k;
-    } else {
-       nrowa = *k;
-       ncola = *m;
-    }
-    if (notb) {
-       nrowb = *k;
-    } else {
-       nrowb = *n;
-    }
-/*     Test the input parameters. */
-    info = 0;
-    if (! nota && ! conja && ! lsame_(transa, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! notb && ! conjb && ! lsame_(transb, "T", (ftnlen)1, (ftnlen)
-           1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < max(1,nrowa)) {
-       info = 8;
-    } else if (*ldb < max(1,nrowb)) {
-       info = 10;
-    } else if (*ldc < max(1,*m)) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("ZGEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) &&
-            (beta->r == 1. && beta->i == 0.)) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (beta->r == 0. && beta->i == 0.) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           z__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (notb) {
-       if (nota) {
-/*           Form  C := alpha*A*B + beta*C. */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L50: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L60: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = l + j * b_dim1;
-                   if (b[i__3].r != 0. || b[i__3].i != 0.) {
-                       i__3 = l + j * b_dim1;
-                       z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               z__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L70: */
-                       }
-                   }
-/* L80: */
-               }
-/* L90: */
-           }
-       } else if (conja) {
-/*           Form  C := alpha*conjg( A' )*B + beta*C. */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, 
-                               z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L110: */
-               }
-/* L120: */
-           }
-       } else {
-/*           Form  C := alpha*A'*B + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L130: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L140: */
-               }
-/* L150: */
-           }
-       }
-    } else if (nota) {
-       if (conjb) {
-/*           Form  C := alpha*A*conjg( B' ) + beta*C. */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L160: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L170: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * b_dim1;
-                   if (b[i__3].r != 0. || b[i__3].i != 0.) {
-                       d_cnjg(&z__2, &b[j + l * b_dim1]);
-                       z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, 
-                               z__1.i = alpha->r * z__2.i + alpha->i * 
-                               z__2.r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L180: */
-                       }
-                   }
-/* L190: */
-               }
-/* L200: */
-           }
-       } else {
-/*           Form  C := alpha*A*B'          + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L210: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L220: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * b_dim1;
-                   if (b[i__3].r != 0. || b[i__3].i != 0.) {
-                       i__3 = j + l * b_dim1;
-                       z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               z__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L230: */
-                       }
-                   }
-/* L240: */
-               }
-/* L250: */
-           }
-       }
-    } else if (conja) {
-       if (conjb) {
-/*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C. */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       d_cnjg(&z__4, &b[j + l * b_dim1]);
-                       z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = 
-                               z__3.r * z__4.i + z__3.i * z__4.r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L260: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L270: */
-               }
-/* L280: */
-           }
-       } else {
-/*           Form  C := alpha*conjg( A' )*B' + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = j + l * b_dim1;
-                       z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, 
-                               z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L290: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L300: */
-               }
-/* L310: */
-           }
-       }
-    } else {
-       if (conjb) {
-/*           Form  C := alpha*A'*conjg( B' ) + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       d_cnjg(&z__3, &b[j + l * b_dim1]);
-                       z__2.r = a[i__4].r * z__3.r - a[i__4].i * z__3.i, 
-                               z__2.i = a[i__4].r * z__3.i + a[i__4].i * 
-                               z__3.r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L320: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L330: */
-               }
-/* L340: */
-           }
-       } else {
-/*           Form  C := alpha*A'*B' + beta*C */
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = j + l * b_dim1;
-                       z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L350: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L360: */
-               }
-/* L370: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZGEMM . */
-} /* zgemm_ */
-/* Subroutine */ int zgemv_(char *trans, integer *m, integer *n, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *
-       incy, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZGEMV  performs one of the matrix-vector operations */
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or */
-/*     y := alpha*conjg( A' )*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-/*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry with BETA non-zero, the incremented array Y */
-/*           must contain the vector y. On exit, Y is overwritten by the */
-/*           updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*lda < max(1,*m)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("ZGEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 
-           1. && beta->i == 0.)) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-/*     First form  y := beta*y. */
-    if (beta->r != 1. || beta->i != 0.) {
-       if (*incy == 1) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   i__2 = jx;
-                   z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__;
-                       i__4 = i__;
-                       i__5 = i__ + j * a_dim1;
-                       z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + 
-                               z__2.i;
-                       y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   i__2 = jx;
-                   z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   iy = ky;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = iy;
-                       i__4 = iy;
-                       i__5 = i__ + j * a_dim1;
-                       z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + 
-                               z__2.i;
-                       y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0., temp.i = 0.;
-               if (noconj) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__;
-                       z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                   }
-               } else {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                       i__3 = i__;
-                       z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                               z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jy += *incy;
-/* L110: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0., temp.i = 0.;
-               ix = kx;
-               if (noconj) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = ix;
-                       z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       ix += *incx;
-/* L120: */
-                   }
-               } else {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                       i__3 = ix;
-                       z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                               z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       ix += *incx;
-/* L130: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jy += *incy;
-/* L140: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZGEMV . */
-} /* zgemv_ */
-/* Subroutine */ int zgerc_(integer *m, integer *n, doublecomplex *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, 
-       doublecomplex *a, integer *lda)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static doublecomplex temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZGERC  performs the rank 1 operation */
-/*     A := alpha*x*conjg( y' ) + A, */
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZGERC ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0. || y[i__2].i != 0.) {
-               d_cnjg(&z__2, &y[jy]);
-               z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                       alpha->r * z__2.i + alpha->i * z__2.r;
-               temp.r = z__1.r, temp.i = z__1.i;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = i__;
-                   z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
-                   a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0. || y[i__2].i != 0.) {
-               d_cnjg(&z__2, &y[jy]);
-               z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                       alpha->r * z__2.i + alpha->i * z__2.r;
-               temp.r = z__1.r, temp.i = z__1.i;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = ix;
-                   z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
-                   a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-    return 0;
-/*     End of ZGERC . */
-} /* zgerc_ */
-/* Subroutine */ int zgeru_(integer *m, integer *n, doublecomplex *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, 
-       doublecomplex *a, integer *lda)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2;
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static doublecomplex temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZGERU  performs the rank 1 operation */
-/*     A := alpha*x*y' + A, */
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-/*  Parameters */
-/*  ========== */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZGERU ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0. || y[i__2].i != 0.) {
-               i__2 = jy;
-               z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i =
-                        alpha->r * y[i__2].i + alpha->i * y[i__2].r;
-               temp.r = z__1.r, temp.i = z__1.i;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = i__;
-                   z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
-                   a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0. || y[i__2].i != 0.) {
-               i__2 = jy;
-               z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i =
-                        alpha->r * y[i__2].i + alpha->i * y[i__2].r;
-               temp.r = z__1.r, temp.i = z__1.i;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = ix;
-                   z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
-                   a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-    return 0;
-/*     End of ZGERU . */
-} /* zgeru_ */
-/* Subroutine */ int zhbmv_(char *uplo, integer *n, integer *k, doublecomplex 
-       *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *
-       incx, doublecomplex *beta, doublecomplex *y, integer *incy, ftnlen 
-       uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZHBMV  performs the matrix-vector  operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian band matrix, with k super-diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*k < 0) {
-       info = 3;
-    } else if (*lda < *k + 1) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("ZHBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && 
-           beta->i == 0.)) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-/*     First form  y := beta*y. */
-    if (beta->r != 1. || beta->i != 0.) {
-       if (*incy == 1) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when upper triangle of A is stored. */
-       kplus1 = *k + 1;
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *k;
-               i__4 = j - 1;
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   i__5 = l + i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                   i__2 = i__;
-                   z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, z__2.i =
-                            z__3.r * x[i__2].i + z__3.i * x[i__2].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L50: */
-               }
-               i__4 = j;
-               i__2 = j;
-               i__3 = kplus1 + j * a_dim1;
-               d__1 = a[i__3].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__2].r + z__3.r, z__2.i = y[i__2].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__4 = jx;
-               z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, z__1.i =
-                        alpha->r * x[i__4].i + alpha->i * x[i__4].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               ix = kx;
-               iy = ky;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__4 = 1, i__2 = j - *k;
-               i__3 = j - 1;
-               for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                   i__4 = iy;
-                   i__2 = iy;
-                   i__5 = l + i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-                   y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-                   d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = ix;
-                   z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
-                            z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__3 = jy;
-               i__4 = jy;
-               i__2 = kplus1 + j * a_dim1;
-               d__1 = a[i__2].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__4].r + z__3.r, z__2.i = y[i__4].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-               if (j > *k) {
-                   kx += *incx;
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when lower triangle of A is stored. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__3 = j;
-               z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i =
-                        alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__3 = j;
-               i__4 = j;
-               i__2 = j * a_dim1 + 1;
-               d__1 = a[i__2].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-               y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-               l = 1 - j;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   i__4 = i__;
-                   i__2 = i__;
-                   i__5 = l + i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-                   y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-                   d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = i__;
-                   z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
-                            z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L90: */
-               }
-               i__3 = j;
-               i__4 = j;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-               y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__3 = jx;
-               z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i =
-                        alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__3 = jy;
-               i__4 = jy;
-               i__2 = j * a_dim1 + 1;
-               d__1 = a[i__2].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-               y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-               l = 1 - j;
-               ix = jx;
-               iy = jy;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__4 = iy;
-                   i__2 = iy;
-                   i__5 = l + i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-                   y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-                   d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = ix;
-                   z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
-                            z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L110: */
-               }
-               i__3 = jy;
-               i__4 = jy;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-               y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZHBMV . */
-} /* zhbmv_ */
-/* Subroutine */ int zhemm_(char *side, char *uplo, integer *m, integer *n, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer *
-       ldc, ftnlen side_len, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4, z__5;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZHEMM  performs one of the matrix-matrix operations */
-/*     C := alpha*A*B + beta*C, */
-/*  or */
-/*     C := alpha*B*A + beta*C, */
-/*  where alpha and beta are scalars, A is an hermitian matrix and  B and */
-/*  C are m by n matrices. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  hermitian matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  hermitian  matrix   A  is  to  be */
-/*           referenced as follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  hermitian matrix is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  hermitian matrix is to be referenced. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is n  otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  hermitian matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  hermitian matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  hermitian */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  hermitian matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  hermitian matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  hermitian */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Note that the imaginary parts  of the diagonal elements need */
-/*           not be set, they are assumed to be zero. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX*16      . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Set NROWA as the number of rows of A. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-/*     Test the input parameters. */
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("ZHEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 
-           1. && beta->i == 0.)) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (beta->r == 0. && beta->i == 0.) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           z__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*B + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * b_dim1;
-                   z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                           z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + j * c_dim1;
-                       i__6 = k + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__6].r - temp1.i * a[i__6].i, 
-                               z__2.i = temp1.r * a[i__6].i + temp1.i * a[
-                               i__6].r;
-                       z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                               z__2.i;
-                       c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-                       i__4 = k + j * b_dim1;
-                       d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                       z__2.r = b[i__4].r * z__3.r - b[i__4].i * z__3.i, 
-                               z__2.i = b[i__4].r * z__3.i + b[i__4].i * 
-                               z__3.r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L50: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + i__ * a_dim1;
-                       d__1 = a[i__4].r;
-                       z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       i__5 = i__ + i__ * a_dim1;
-                       d__1 = a[i__5].r;
-                       z__4.r = d__1 * temp1.r, z__4.i = d__1 * temp1.i;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   i__2 = i__ + j * b_dim1;
-                   z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                           z__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       i__3 = k + j * c_dim1;
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               z__2.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       z__1.r = c__[i__4].r + z__2.r, z__1.i = c__[i__4].i + 
-                               z__2.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       i__3 = k + j * b_dim1;
-                       d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                       z__2.r = b[i__3].r * z__3.r - b[i__3].i * z__3.i, 
-                               z__2.i = b[i__3].r * z__3.i + b[i__3].i * 
-                               z__3.r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L80: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + i__ * a_dim1;
-                       d__1 = a[i__3].r;
-                       z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-                   } else {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__3].r - beta->i * c__[i__3]
-                               .i, z__3.i = beta->r * c__[i__3].i + beta->i *
-                                c__[i__3].r;
-                       i__4 = i__ + i__ * a_dim1;
-                       d__1 = a[i__4].r;
-                       z__4.r = d__1 * temp1.r, z__4.i = d__1 * temp1.i;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*B*A + beta*C. */
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = j + j * a_dim1;
-           d__1 = a[i__2].r;
-           z__1.r = d__1 * alpha->r, z__1.i = d__1 * alpha->i;
-           temp1.r = z__1.r, temp1.i = z__1.i;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * b_dim1;
-                   z__1.r = temp1.r * b[i__4].r - temp1.i * b[i__4].i, 
-                           z__1.i = temp1.r * b[i__4].i + temp1.i * b[i__4]
-                           .r;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   z__2.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           z__2.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   i__5 = i__ + j * b_dim1;
-                   z__3.r = temp1.r * b[i__5].r - temp1.i * b[i__5].i, 
-                           z__3.i = temp1.r * b[i__5].i + temp1.i * b[i__5]
-                           .r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = k + j * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               } else {
-                   d_cnjg(&z__2, &a[j + k * a_dim1]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                           z__2.i;
-                   c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   d_cnjg(&z__2, &a[j + k * a_dim1]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               } else {
-                   i__3 = k + j * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                           z__2.i;
-                   c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-    return 0;
-/*     End of ZHEMM . */
-} /* zhemm_ */
-/* Subroutine */ int zhemv_(char *uplo, integer *n, doublecomplex *alpha, 
-       doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, 
-       doublecomplex *beta, doublecomplex *y, integer *incy, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZHEMV  performs the matrix-vector  operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*lda < max(1,*n)) {
-       info = 5;
-    } else if (*incx == 0) {
-       info = 7;
-    } else if (*incy == 0) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("ZHEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && 
-           beta->i == 0.)) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-/*     First form  y := beta*y. */
-    if (beta->r != 1. || beta->i != 0.) {
-       if (*incy == 1) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when A is stored in upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                   i__3 = i__;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L50: */
-               }
-               i__2 = j;
-               i__3 = j;
-               i__4 = j + j * a_dim1;
-               d__1 = a[i__4].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               ix = kx;
-               iy = ky;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                   i__3 = ix;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = j + j * a_dim1;
-               d__1 = a[i__4].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when A is stored in lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__2 = j;
-               i__3 = j;
-               i__4 = j + j * a_dim1;
-               d__1 = a[i__4].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                   i__3 = i__;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L90: */
-               }
-               i__2 = j;
-               i__3 = j;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = j + j * a_dim1;
-               d__1 = a[i__4].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               ix = jx;
-               iy = jy;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                   i__3 = ix;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L110: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZHEMV . */
-} /* zhemv_ */
-/* Subroutine */ int zher_(char *uplo, integer *n, doublereal *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *a, integer *lda, 
-       ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZHER   performs the hermitian rank 1 operation */
-/*     A := alpha*x*conjg( x' ) + A, */
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n hermitian matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*lda < max(1,*n)) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("ZHER  ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-/*     Set the start point in X if the increment is not unity. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when A is stored in upper triangle. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[j]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + 
-                               z__2.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L10: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, z__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[jx]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   ix = kx;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + 
-                               z__2.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-                       ix += *incx;
-/* L30: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, z__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-               jx += *incx;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when A is stored in lower triangle. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[j]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, z__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + 
-                               z__2.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L50: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[jx]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, z__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-                   ix = jx;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + 
-                               z__2.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZHER  . */
-} /* zher_ */
-/* Subroutine */ int zher2_(char *uplo, integer *n, doublecomplex *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, 
-       doublecomplex *a, integer *lda, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZHER2  performs the hermitian rank 2 operation */
-/*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, */
-/*  where alpha is a scalar, x and y are n element vectors and A is an n */
-/*  by n hermitian matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*n)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZHER2 ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when A is stored in the upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[j]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = j;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + 
-                               z__3.i;
-                       i__6 = i__;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L10: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[jy]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = jx;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ix = kx;
-                   iy = ky;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + 
-                               z__3.i;
-                       i__6 = iy;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-               jx += *incx;
-               jy += *incy;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when A is stored in the lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[j]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = j;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + 
-                               z__3.i;
-                       i__6 = i__;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L50: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[jy]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = jx;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-                   ix = jx;
-                   iy = jy;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       iy += *incy;
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + 
-                               z__3.i;
-                       i__6 = iy;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZHER2 . */
-} /* zher2_ */
-/* Subroutine */ int zher2k_(char *uplo, char *trans, integer *n, integer *k, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       b, integer *ldb, doublereal *beta, doublecomplex *c__, integer *ldc, 
-       ftnlen uplo_len, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6, i__7;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4, z__5, z__6;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZHER2K  performs one of the hermitian rank 2k operations */
-/*     C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, */
-/*  or */
-/*     C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, */
-/*  where  alpha and beta  are scalars with  beta  real,  C is an  n by n */
-/*  hermitian matrix and  A and B  are  n by k matrices in the first case */
-/*  and  k by n  matrices in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          + */
-/*                                         conjg( alpha )*B*conjg( A' ) + */
-/*                                         beta*C. */
-/*              TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          + */
-/*                                         conjg( alpha )*conjg( B' )*A + */
-/*                                         beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the */
-/*           matrices  A and B.  K must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  BETA   - DOUBLE PRECISION            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX*16          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set,  they are assumed to be zero,  and on exit they */
-/*           are set to zero. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. */
-/*     Ed Anderson, Cray Research Inc. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "C", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("ZHER2K", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && *beta == 
-           1.) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (upper) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + */
-/*                   C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L90: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L100: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != 
-                           0. || b[i__4].i != 0.)) {
-                       d_cnjg(&z__2, &b[j + l * b_dim1]);
-                       z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, 
-                               z__1.i = alpha->r * z__2.i + alpha->i * 
-                               z__2.r;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__3 = j + l * a_dim1;
-                       z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__2.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       d_cnjg(&z__1, &z__2);
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-                       i__3 = j - 1;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, z__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5]
-                                   .i + z__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           z__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, z__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + 
-                                   z__4.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L110: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, 
-                               z__2.i = a[i__5].r * temp1.i + a[i__5].i * 
-                               temp1.r;
-                       i__6 = j + l * b_dim1;
-                       z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, 
-                               z__3.i = b[i__6].r * temp2.i + b[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       d__1 = c__[i__4].r + z__1.r;
-                       c__[i__3].r = d__1, c__[i__3].i = 0.;
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L140: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L150: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != 
-                           0. || b[i__4].i != 0.)) {
-                       d_cnjg(&z__2, &b[j + l * b_dim1]);
-                       z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, 
-                               z__1.i = alpha->r * z__2.i + alpha->i * 
-                               z__2.r;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__3 = j + l * a_dim1;
-                       z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__2.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       d_cnjg(&z__1, &z__2);
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-                       i__3 = *n;
-                       for (i__ = j + 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, z__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5]
-                                   .i + z__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           z__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, z__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + 
-                                   z__4.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L160: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, 
-                               z__2.i = a[i__5].r * temp1.i + a[i__5].i * 
-                               temp1.r;
-                       i__6 = j + l * b_dim1;
-                       z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, 
-                               z__3.i = b[i__6].r * temp2.i + b[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       d__1 = c__[i__4].r + z__1.r;
-                       c__[i__3].r = d__1, c__[i__3].i = 0.;
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + */
-/*                   C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1.r = 0., temp1.i = 0.;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, 
-                               z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
-                               .r;
-                       z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       d_cnjg(&z__3, &b[l + i__ * b_dim1]);
-                       i__4 = l + j * a_dim1;
-                       z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, 
-                               z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
-                               .r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L190: */
-                   }
-                   if (i__ == j) {
-                       if (*beta == 0.) {
-                           i__3 = j + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           d__1 = z__1.r;
-                           c__[i__3].r = d__1, c__[i__3].i = 0.;
-                       } else {
-                           i__3 = j + j * c_dim1;
-                           i__4 = j + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           d__1 = *beta * c__[i__4].r + z__1.r;
-                           c__[i__3].r = d__1, c__[i__3].i = 0.;
-                       }
-                   } else {
-                       if (*beta == 0.) {
-                           i__3 = i__ + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       } else {
-                           i__3 = i__ + j * c_dim1;
-                           i__4 = i__ + j * c_dim1;
-                           z__3.r = *beta * c__[i__4].r, z__3.i = *beta * 
-                                   c__[i__4].i;
-                           z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__4.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + 
-                                   z__4.i;
-                           d_cnjg(&z__6, alpha);
-                           z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, 
-                                   z__5.i = z__6.r * temp2.i + z__6.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + 
-                                   z__5.i;
-                           c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       }
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1.r = 0., temp1.i = 0.;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, 
-                               z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
-                               .r;
-                       z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       d_cnjg(&z__3, &b[l + i__ * b_dim1]);
-                       i__4 = l + j * a_dim1;
-                       z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, 
-                               z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
-                               .r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L220: */
-                   }
-                   if (i__ == j) {
-                       if (*beta == 0.) {
-                           i__3 = j + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           d__1 = z__1.r;
-                           c__[i__3].r = d__1, c__[i__3].i = 0.;
-                       } else {
-                           i__3 = j + j * c_dim1;
-                           i__4 = j + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           d__1 = *beta * c__[i__4].r + z__1.r;
-                           c__[i__3].r = d__1, c__[i__3].i = 0.;
-                       }
-                   } else {
-                       if (*beta == 0.) {
-                           i__3 = i__ + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       } else {
-                           i__3 = i__ + j * c_dim1;
-                           i__4 = i__ + j * c_dim1;
-                           z__3.r = *beta * c__[i__4].r, z__3.i = *beta * 
-                                   c__[i__4].i;
-                           z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__4.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + 
-                                   z__4.i;
-                           d_cnjg(&z__6, alpha);
-                           z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, 
-                                   z__5.i = z__6.r * temp2.i + z__6.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + 
-                                   z__5.i;
-                           c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       }
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZHER2K. */
-} /* zher2k_ */
-/* Subroutine */ int zherk_(char *uplo, char *trans, integer *n, integer *k, 
-       doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta, 
-       doublecomplex *c__, integer *ldc, ftnlen uplo_len, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static doublereal rtemp;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZHERK  performs one of the hermitian rank k operations */
-/*     C := alpha*A*conjg( A' ) + beta*C, */
-/*  or */
-/*     C := alpha*conjg( A' )*A + beta*C, */
-/*  where  alpha and beta  are  real scalars,  C is an  n by n  hermitian */
-/*  matrix and  A  is an  n by k  matrix in the  first case and a  k by n */
-/*  matrix in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C. */
-/*              TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the */
-/*           matrix A.  K must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - DOUBLE PRECISION            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX*16          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set,  they are assumed to be zero,  and on exit they */
-/*           are set to zero. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. */
-/*     Ed Anderson, Cray Research Inc. */
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "C", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("ZHERK ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
-       return 0;
-    }
-/*     And when */
-    if (*alpha == 0.) {
-       if (upper) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*conjg( A' ) + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L90: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L100: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                       d_cnjg(&z__2, &a[j + l * a_dim1]);
-                       z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = j - 1;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L110: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = i__ + l * a_dim1;
-                       z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       d__1 = c__[i__4].r + z__1.r;
-                       c__[i__3].r = d__1, c__[i__3].i = 0.;
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L140: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L150: */
-                   }
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                       d_cnjg(&z__2, &a[j + l * a_dim1]);
-                       z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       d__1 = c__[i__4].r + z__1.r;
-                       c__[i__3].r = d__1, c__[i__3].i = 0.;
-                       i__3 = *n;
-                       for (i__ = j + 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*conjg( A' )*A + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * a_dim1;
-                       z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, 
-                               z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L190: */
-                   }
-                   if (*beta == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[
-                               i__4].i;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L200: */
-               }
-               rtemp = 0.;
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   d_cnjg(&z__3, &a[l + j * a_dim1]);
-                   i__3 = l + j * a_dim1;
-                   z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i =
-                            z__3.r * a[i__3].i + z__3.i * a[i__3].r;
-                   z__1.r = rtemp + z__2.r, z__1.i = z__2.i;
-                   rtemp = z__1.r;
-/* L210: */
-               }
-               if (*beta == 0.) {
-                   i__2 = j + j * c_dim1;
-                   d__1 = *alpha * rtemp;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *alpha * rtemp + *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-/* L220: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               rtemp = 0.;
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   d_cnjg(&z__3, &a[l + j * a_dim1]);
-                   i__3 = l + j * a_dim1;
-                   z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i =
-                            z__3.r * a[i__3].i + z__3.i * a[i__3].r;
-                   z__1.r = rtemp + z__2.r, z__1.i = z__2.i;
-                   rtemp = z__1.r;
-/* L230: */
-               }
-               if (*beta == 0.) {
-                   i__2 = j + j * c_dim1;
-                   d__1 = *alpha * rtemp;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *alpha * rtemp + *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * a_dim1;
-                       z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, 
-                               z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L240: */
-                   }
-                   if (*beta == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[
-                               i__4].i;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L250: */
-               }
-/* L260: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZHERK . */
-} /* zherk_ */
-/* Subroutine */ int zhpmv_(char *uplo, integer *n, doublecomplex *alpha, 
-       doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex *
-       beta, doublecomplex *y, integer *incy, ftnlen uplo_len)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZHPMV  performs the matrix-vector operation */
-/*     y := alpha*A*x + beta*y, */
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  AP     - COMPLEX*16       array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 6;
-    } else if (*incy == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZHPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && 
-           beta->i == 0.)) {
-       return 0;
-    }
-/*     Set up the start points in  X  and  Y. */
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-/*     First form  y := beta*y. */
-    if (beta->r != 1. || beta->i != 0.) {
-       if (*incy == 1) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  y  when AP contains the upper triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               k = kk;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = k;
-                   z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &ap[k]);
-                   i__3 = i__;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ++k;
-/* L50: */
-               }
-               i__2 = j;
-               i__3 = j;
-               i__4 = kk + j - 1;
-               d__1 = ap[i__4].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               kk += j;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               ix = kx;
-               iy = ky;
-               i__2 = kk + j - 2;
-               for (k = kk; k <= i__2; ++k) {
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = k;
-                   z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &ap[k]);
-                   i__3 = ix;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = kk + j - 1;
-               d__1 = ap[i__4].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L80: */
-           }
-       }
-    } else {
-/*        Form  y  when AP contains the lower triangle. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__2 = j;
-               i__3 = j;
-               i__4 = kk;
-               d__1 = ap[i__4].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               k = kk + 1;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = k;
-                   z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &ap[k]);
-                   i__3 = i__;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ++k;
-/* L90: */
-               }
-               i__2 = j;
-               i__3 = j;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               kk += *n - j + 1;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = kk;
-               d__1 = ap[i__4].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               ix = jx;
-               iy = jy;
-               i__2 = kk + *n - j;
-               for (k = kk + 1; k <= i__2; ++k) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = k;
-                   z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &ap[k]);
-                   i__3 = ix;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L110: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-               kk += *n - j + 1;
-/* L120: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZHPMV . */
-} /* zhpmv_ */
-/* Subroutine */ int zhpr_(char *uplo, integer *n, doublereal *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *ap, ftnlen uplo_len)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZHPR    performs the hermitian rank 1 operation */
-/*     A := alpha*x*conjg( x' ) + A, */
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n hermitian matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  AP     - COMPLEX*16       array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --ap;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    }
-    if (info != 0) {
-       xerbla_("ZHPR  ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-/*     Set the start point in X if the increment is not unity. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when upper triangle is stored in AP. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[j]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + 
-                               z__2.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ++k;
-/* L10: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = j;
-                   z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, z__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[jx]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   ix = kx;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + 
-                               z__2.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ix += *incx;
-/* L30: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = jx;
-                   z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, z__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               jx += *incx;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when lower triangle is stored in AP. */
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[j]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = j;
-                   z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, z__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + 
-                               z__2.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ++k;
-/* L50: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[jx]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = jx;
-                   z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, z__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-                   ix = jx;
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + 
-                               z__2.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               jx += *incx;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZHPR  . */
-} /* zhpr_ */
-/* Subroutine */ int zhpr2_(char *uplo, integer *n, doublecomplex *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, 
-       doublecomplex *ap, ftnlen uplo_len)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5, i__6;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZHPR2  performs the hermitian rank 2 operation */
-/*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, */
-/*  where alpha is a scalar, x and y are n element vectors and A is an */
-/*  n by n hermitian matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-/*  AP     - COMPLEX*16       array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --ap;
-    --y;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("ZHPR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-/*        Form  A  when upper triangle is stored in AP. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[j]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = j;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = ap[i__4].r + z__3.r, z__2.i = ap[i__4].i + 
-                               z__3.i;
-                       i__6 = i__;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ++k;
-/* L10: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = j;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[jy]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = jx;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ix = kx;
-                   iy = ky;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = ap[i__4].r + z__3.r, z__2.i = ap[i__4].i + 
-                               z__3.i;
-                       i__6 = iy;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = jx;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-/*        Form  A  when lower triangle is stored in AP. */
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[j]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = j;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = j;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = ap[i__4].r + z__3.r, z__2.i = ap[i__4].i + 
-                               z__3.i;
-                       i__6 = i__;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ++k;
-/* L50: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[jy]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = jx;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = jx;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-                   ix = jx;
-                   iy = jy;
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       iy += *incy;
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = ap[i__4].r + z__3.r, z__2.i = ap[i__4].i + 
-                               z__3.i;
-                       i__6 = iy;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               jx += *incx;
-               jy += *incy;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZHPR2 . */
-} /* zhpr2_ */
-/* Subroutine */ int zrotg_(doublecomplex *ca, doublecomplex *cb, doublereal *
-       c__, doublecomplex *s)
-    /* System generated locals */
-    doublereal d__1, d__2;
-    doublecomplex z__1, z__2, z__3, z__4;
-    /* Builtin functions */
-    double z_abs(doublecomplex *);
-    void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
-    double sqrt(doublereal);
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static doublereal norm;
-    static doublecomplex alpha;
-    static doublereal scale;
-    if (z_abs(ca) != 0.) {
-       goto L10;
-    }
-    *c__ = 0.;
-    s->r = 1., s->i = 0.;
-    ca->r = cb->r, ca->i = cb->i;
-    goto L20;
-    scale = z_abs(ca) + z_abs(cb);
-    z__2.r = scale, z__2.i = 0.;
-    z_div(&z__1, ca, &z__2);
-/* Computing 2nd power */
-    d__1 = z_abs(&z__1);
-    z__4.r = scale, z__4.i = 0.;
-    z_div(&z__3, cb, &z__4);
-/* Computing 2nd power */
-    d__2 = z_abs(&z__3);
-    norm = scale * sqrt(d__1 * d__1 + d__2 * d__2);
-    d__1 = z_abs(ca);
-    z__1.r = ca->r / d__1, z__1.i = ca->i / d__1;
-    alpha.r = z__1.r, alpha.i = z__1.i;
-    *c__ = z_abs(ca) / norm;
-    d_cnjg(&z__3, cb);
-    z__2.r = alpha.r * z__3.r - alpha.i * z__3.i, z__2.i = alpha.r * z__3.i + 
-           alpha.i * z__3.r;
-    z__1.r = z__2.r / norm, z__1.i = z__2.i / norm;
-    s->r = z__1.r, s->i = z__1.i;
-    z__1.r = norm * alpha.r, z__1.i = norm * alpha.i;
-    ca->r = z__1.r, ca->i = z__1.i;
-    return 0;
-} /* zrotg_ */
-/* Subroutine */ int zscal_(integer *n, doublecomplex *za, doublecomplex *zx, 
-       integer *incx)
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    doublecomplex z__1;
-    /* Local variables */
-    static integer i__, ix;
-/*     scales a vector by a constant. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --zx;
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-/*        code for increment not equal to 1 */
-    ix = 1;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       i__3 = ix;
-       z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[
-               i__3].i + za->i * zx[i__3].r;
-       zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
-       ix += *incx;
-/* L10: */
-    }
-    return 0;
-/*        code for increment equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[
-               i__3].i + za->i * zx[i__3].r;
-       zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
-/* L30: */
-    }
-    return 0;
-} /* zscal_ */
-/* Subroutine */ int zswap_(integer *n, doublecomplex *zx, integer *incx, 
-       doublecomplex *zy, integer *incy)
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublecomplex ztemp;
-/*     interchanges two vectors. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i;
-       i__2 = ix;
-       i__3 = iy;
-       zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i;
-       i__2 = iy;
-       zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-/*       code for both increments equal to 1 */
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i;
-       i__2 = i__;
-       i__3 = i__;
-       zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i;
-       i__2 = i__;
-       zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i;
-/* L30: */
-    }
-    return 0;
-} /* zswap_ */
-/* Subroutine */ int zsymm_(char *side, char *uplo, integer *m, integer *n, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer *
-       ldc, ftnlen side_len, ftnlen uplo_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    doublecomplex z__1, z__2, z__3, z__4, z__5;
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZSYMM  performs one of the matrix-matrix operations */
-/*     C := alpha*A*B + beta*C, */
-/*  or */
-/*     C := alpha*B*A + beta*C, */
-/*  where  alpha and beta are scalars, A is a symmetric matrix and  B and */
-/*  C are m by n matrices. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  symmetric matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  symmetric  matrix   A  is  to  be */
-/*           referenced as follows: */
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is n  otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX*16      . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Set NROWA as the number of rows of A. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-/*     Test the input parameters. */
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("ZSYMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 
-           1. && beta->i == 0.)) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (beta->r == 0. && beta->i == 0.) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           z__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*B + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * b_dim1;
-                   z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                           z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + j * c_dim1;
-                       i__6 = k + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__6].r - temp1.i * a[i__6].i, 
-                               z__2.i = temp1.r * a[i__6].i + temp1.i * a[
-                               i__6].r;
-                       z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                               z__2.i;
-                       c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-                       i__4 = k + j * b_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       z__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, z__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L50: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, 
-                               z__2.i = temp1.r * a[i__4].i + temp1.i * a[
-                               i__4].r;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       i__5 = i__ + i__ * a_dim1;
-                       z__4.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               z__4.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   i__2 = i__ + j * b_dim1;
-                   z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                           z__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       i__3 = k + j * c_dim1;
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               z__2.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       z__1.r = c__[i__4].r + z__2.r, z__1.i = c__[i__4].i + 
-                               z__2.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       i__3 = k + j * b_dim1;
-                       i__4 = k + i__ * a_dim1;
-                       z__2.r = b[i__3].r * a[i__4].r - b[i__3].i * a[i__4]
-                               .i, z__2.i = b[i__3].r * a[i__4].i + b[i__3]
-                               .i * a[i__4].r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L80: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__3].r - temp1.i * a[i__3].i, 
-                               z__2.i = temp1.r * a[i__3].i + temp1.i * a[
-                               i__3].r;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-                   } else {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__3].r - beta->i * c__[i__3]
-                               .i, z__3.i = beta->r * c__[i__3].i + beta->i *
-                                c__[i__3].r;
-                       i__4 = i__ + i__ * a_dim1;
-                       z__4.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, 
-                               z__4.i = temp1.r * a[i__4].i + temp1.i * a[
-                               i__4].r;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*B*A + beta*C. */
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = j + j * a_dim1;
-           z__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2].i, z__1.i = 
-                   alpha->r * a[i__2].i + alpha->i * a[i__2].r;
-           temp1.r = z__1.r, temp1.i = z__1.i;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * b_dim1;
-                   z__1.r = temp1.r * b[i__4].r - temp1.i * b[i__4].i, 
-                           z__1.i = temp1.r * b[i__4].i + temp1.i * b[i__4]
-                           .r;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   z__2.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           z__2.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   i__5 = i__ + j * b_dim1;
-                   z__3.r = temp1.r * b[i__5].r - temp1.i * b[i__5].i, 
-                           z__3.i = temp1.r * b[i__5].i + temp1.i * b[i__5]
-                           .r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = k + j * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               } else {
-                   i__3 = j + k * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                           z__2.i;
-                   c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = j + k * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               } else {
-                   i__3 = k + j * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                           z__2.i;
-                   c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-    return 0;
-/*     End of ZSYMM . */
-} /* zsymm_ */
-/* Subroutine */ int zsyr2k_(char *uplo, char *trans, integer *n, integer *k, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer *
-       ldc, ftnlen uplo_len, ftnlen trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6, i__7;
-    doublecomplex z__1, z__2, z__3, z__4, z__5;
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZSYR2K  performs one of the symmetric rank 2k operations */
-/*     C := alpha*A*B' + alpha*B*A' + beta*C, */
-/*  or */
-/*     C := alpha*A'*B + alpha*B'*A + beta*C, */
-/*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix */
-/*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n */
-/*  matrices in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'    C := alpha*A*B' + alpha*B*A' + */
-/*                                         beta*C. */
-/*              TRANS = 'T' or 't'    C := alpha*A'*B + alpha*B'*A + */
-/*                                         beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'T' or 't',  K  specifies  the number of rows of the */
-/*           matrices  A and B.  K must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("ZSYR2K", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && (beta->r 
-           == 1. && beta->i == 0.)) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (upper) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L90: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != 
-                           0. || b[i__4].i != 0.)) {
-                       i__3 = j + l * b_dim1;
-                       z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               z__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__3 = j + l * a_dim1;
-                       z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, z__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5]
-                                   .i + z__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           z__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, z__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + 
-                                   z__4.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L140: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != 
-                           0. || b[i__4].i != 0.)) {
-                       i__3 = j + l * b_dim1;
-                       z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               z__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__3 = j + l * a_dim1;
-                       z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, z__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5]
-                                   .i + z__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           z__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, z__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + 
-                                   z__4.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1.r = 0., temp1.i = 0.;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__4 = l + i__ * b_dim1;
-                       i__5 = l + j * a_dim1;
-                       z__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, z__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L190: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               z__2.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               z__4.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1.r = 0., temp1.i = 0.;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__4 = l + i__ * b_dim1;
-                       i__5 = l + j * a_dim1;
-                       z__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, z__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L220: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               z__2.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               z__4.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZSYR2K. */
-} /* zsyr2k_ */
-/* Subroutine */ int zsyrk_(char *uplo, char *trans, integer *n, integer *k, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       beta, doublecomplex *c__, integer *ldc, ftnlen uplo_len, ftnlen 
-       trans_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    doublecomplex z__1, z__2, z__3;
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZSYRK  performs one of the symmetric rank k operations */
-/*     C := alpha*A*A' + beta*C, */
-/*  or */
-/*     C := alpha*A'*A + beta*C, */
-/*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix */
-/*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix */
-/*  in the second case. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C. */
-/*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'T' or 't',  K  specifies  the number of rows of the */
-/*           matrix A.  K must be at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-/*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("ZSYRK ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && (beta->r 
-           == 1. && beta->i == 0.)) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (upper) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  C := alpha*A*A' + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L90: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                       i__3 = j + l * a_dim1;
-                       z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L140: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                       i__3 = j + l * a_dim1;
-                       z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-/*        Form  C := alpha*A'*A + beta*C. */
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * a_dim1;
-                       z__2.r = a[i__4].r * a[i__5].r - a[i__4].i * a[i__5]
-                               .i, z__2.i = a[i__4].r * a[i__5].i + a[i__4]
-                               .i * a[i__5].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L190: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * a_dim1;
-                       z__2.r = a[i__4].r * a[i__5].r - a[i__4].i * a[i__5]
-                               .i, z__2.i = a[i__4].r * a[i__5].i + a[i__4]
-                               .i * a[i__5].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L220: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-    return 0;
-/*     End of ZSYRK . */
-} /* zsyrk_ */
-/* Subroutine */ int ztbmv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer 
-       *incx, ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZTBMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZTBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*         Form  x := A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__5 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__4 = j;
-                           i__2 = j;
-                           i__3 = kplus1 + j * a_dim1;
-                           z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, z__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__4 = jx;
-                   if (x[i__4].r != 0. || x[i__4].i != 0.) {
-                       i__4 = jx;
-                       temp.r = x[i__4].r, temp.i = x[i__4].i;
-                       ix = kx;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           i__4 = ix;
-                           i__2 = ix;
-                           i__5 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__2].r + z__2.r, z__1.i = x[i__2].i + 
-                                   z__2.i;
-                           x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__3 = jx;
-                           i__4 = jx;
-                           i__2 = kplus1 + j * a_dim1;
-                           z__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[
-                                   i__2].i, z__1.i = x[i__4].r * a[i__2].i + 
-                                   x[i__4].i * a[i__2].r;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                       }
-                   }
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__1 = *n, i__3 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                           i__1 = i__;
-                           i__3 = i__;
-                           i__2 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   z__2.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__4 = j;
-                           i__1 = j;
-                           i__3 = j * a_dim1 + 1;
-                           z__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[
-                                   i__3].i, z__1.i = x[i__1].r * a[i__3].i + 
-                                   x[i__1].i * a[i__3].r;
-                           x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__4 = jx;
-                   if (x[i__4].r != 0. || x[i__4].i != 0.) {
-                       i__4 = jx;
-                       temp.r = x[i__4].r, temp.i = x[i__4].i;
-                       ix = kx;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__4 = *n, i__1 = j + *k;
-                       i__3 = j + 1;
-                       for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                           i__4 = ix;
-                           i__1 = ix;
-                           i__2 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   z__2.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           z__1.r = x[i__1].r + z__2.r, z__1.i = x[i__1].i + 
-                                   z__2.i;
-                           x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__3 = jx;
-                           i__4 = jx;
-                           i__1 = j * a_dim1 + 1;
-                           z__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[
-                                   i__1].i, z__1.i = x[i__4].r * a[i__1].i + 
-                                   x[i__4].i * a[i__1].r;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                       }
-                   }
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__3 = j;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   l = kplus1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__3 = kplus1 + j * a_dim1;
-                           z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   z__1.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__1 = i__;
-                           z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-                                   i__1].i, z__2.i = a[i__4].r * x[i__1].i + 
-                                   a[i__4].i * x[i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = i__;
-                           z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, 
-                                   z__2.i = z__3.r * x[i__4].i + z__3.i * x[
-                                   i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                       }
-                   }
-                   i__3 = j;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-/* L110: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__3 = jx;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   kx -= *incx;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__3 = kplus1 + j * a_dim1;
-                           z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   z__1.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__1 = ix;
-                           z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-                                   i__1].i, z__2.i = a[i__4].r * x[i__1].i + 
-                                   a[i__4].i * x[i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = ix;
-                           z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, 
-                                   z__2.i = z__3.r * x[i__4].i + z__3.i * x[
-                                   i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L130: */
-                       }
-                   }
-                   i__3 = jx;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-                   jx -= *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   i__4 = j;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   l = 1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__4 = j * a_dim1 + 1;
-                           z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__1.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__2 = i__;
-                           z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, z__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = i__;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L160: */
-                       }
-                   }
-                   i__4 = j;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   i__4 = jx;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   kx += *incx;
-                   ix = kx;
-                   l = 1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__4 = j * a_dim1 + 1;
-                           z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__1.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__2 = ix;
-                           z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, z__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = ix;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L190: */
-                       }
-                   }
-                   i__4 = jx;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-                   jx += *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of ZTBMV . */
-} /* ztbmv_ */
-/* Subroutine */ int ztbsv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer 
-       *incx, ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg(
-           doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZTBSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular band matrix, with ( k + 1 ) */
-/*  diagonals. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZTBSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed by sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       l = kplus1 - j;
-                       if (nounit) {
-                           i__1 = j;
-                           z_div(&z__1, &x[j], &a[kplus1 + j * a_dim1]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           z__1.r = x[i__3].r - z__2.r, z__1.i = x[i__3].i - 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   kx -= *incx;
-                   i__1 = jx;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       ix = kx;
-                       l = kplus1 - j;
-                       if (nounit) {
-                           i__1 = jx;
-                           z_div(&z__1, &x[jx], &a[kplus1 + j * a_dim1]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           z__1.r = x[i__3].r - z__2.r, z__1.i = x[i__3].i - 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                           ix -= *incx;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       l = 1 - j;
-                       if (nounit) {
-                           i__2 = j;
-                           z_div(&z__1, &x[j], &a[j * a_dim1 + 1]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   kx += *incx;
-                   i__2 = jx;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       ix = kx;
-                       l = 1 - j;
-                       if (nounit) {
-                           i__2 = jx;
-                           z_div(&z__1, &x[jx], &a[j * a_dim1 + 1]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                           ix += *incx;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A') )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   l = kplus1 - j;
-                   if (noconj) {
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           i__2 = l + i__ + j * a_dim1;
-                           i__3 = i__;
-                           z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, z__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[kplus1 + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = i__;
-                           z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, 
-                                   z__2.i = z__3.r * x[i__4].i + z__3.i * x[
-                                   i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__3 = j;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__3 = jx;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (noconj) {
-/* Computing MAX */
-                       i__3 = 1, i__4 = j - *k;
-                       i__2 = j - 1;
-                       for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                           i__3 = l + i__ + j * a_dim1;
-                           i__4 = ix;
-                           z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, z__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[kplus1 + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__2 = ix;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__4 = jx;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   l = 1 - j;
-                   if (noconj) {
-/* Computing MIN */
-                       i__1 = *n, i__4 = j + *k;
-                       i__2 = j + 1;
-                       for (i__ = min(i__1,i__4); i__ >= i__2; --i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__4 = i__;
-                           z__2.r = a[i__1].r * x[i__4].r - a[i__1].i * x[
-                                   i__4].i, z__2.i = a[i__1].r * x[i__4].i + 
-                                   a[i__1].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L150: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j * a_dim1 + 1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-/* Computing MIN */
-                       i__2 = *n, i__1 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__2,i__1); i__ >= i__4; --i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__2 = i__;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L160: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__4 = j;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__4 = jx;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   ix = kx;
-                   l = 1 - j;
-                   if (noconj) {
-/* Computing MIN */
-                       i__4 = *n, i__2 = j + *k;
-                       i__1 = j + 1;
-                       for (i__ = min(i__4,i__2); i__ >= i__1; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__2 = ix;
-                           z__2.r = a[i__4].r * x[i__2].r - a[i__4].i * x[
-                                   i__2].i, z__2.i = a[i__4].r * x[i__2].i + 
-                                   a[i__4].i * x[i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j * a_dim1 + 1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-/* Computing MIN */
-                       i__1 = *n, i__4 = j + *k;
-                       i__2 = j + 1;
-                       for (i__ = min(i__1,i__4); i__ >= i__2; --i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = ix;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of ZTBSV . */
-} /* ztbsv_ */
-/* Subroutine */ int ztpmv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublecomplex *ap, doublecomplex *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZTPMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix, supplied in packed form. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  AP     - COMPLEX*16       array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("ZTPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x:= A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       k = kk;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = k;
-                           z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, z__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                           ++k;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__2 = j;
-                           i__3 = j;
-                           i__4 = kk + j - 1;
-                           z__1.r = x[i__3].r * ap[i__4].r - x[i__3].i * ap[
-                                   i__4].i, z__1.i = x[i__3].r * ap[i__4].i 
-                                   + x[i__3].i * ap[i__4].r;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                   }
-                   kk += j;
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = kx;
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = k;
-                           z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, z__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__2 = jx;
-                           i__3 = jx;
-                           i__4 = kk + j - 1;
-                           z__1.r = x[i__3].r * ap[i__4].r - x[i__3].i * ap[
-                                   i__4].i, z__1.i = x[i__3].r * ap[i__4].i 
-                                   + x[i__3].i * ap[i__4].r;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                   }
-                   jx += *incx;
-                   kk += j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       k = kk;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = k;
-                           z__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, z__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                           --k;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__1 = j;
-                           i__2 = j;
-                           i__3 = kk - *n + j;
-                           z__1.r = x[i__2].r * ap[i__3].r - x[i__2].i * ap[
-                                   i__3].i, z__1.i = x[i__2].r * ap[i__3].i 
-                                   + x[i__2].i * ap[i__3].r;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                   }
-                   kk -= *n - j + 1;
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = kx;
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = k;
-                           z__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, z__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__1 = jx;
-                           i__2 = jx;
-                           i__3 = kk - *n + j;
-                           z__1.r = x[i__2].r * ap[i__3].r - x[i__2].i * ap[
-                                   i__3].i, z__1.i = x[i__2].r * ap[i__3].i 
-                                   + x[i__2].i * ap[i__3].r;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   k = kk - 1;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = kk;
-                           z__1.r = temp.r * ap[i__1].r - temp.i * ap[i__1]
-                                   .i, z__1.i = temp.r * ap[i__1].i + temp.i 
-                                   * ap[i__1].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = k;
-                           i__2 = i__;
-                           z__2.r = ap[i__1].r * x[i__2].r - ap[i__1].i * x[
-                                   i__2].i, z__2.i = ap[i__1].r * x[i__2].i 
-                                   + ap[i__1].i * x[i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           --k;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__1 = i__;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           --k;
-/* L100: */
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   kk -= j;
-/* L110: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = kk;
-                           z__1.r = temp.r * ap[i__1].r - temp.i * ap[i__1]
-                                   .i, z__1.i = temp.r * ap[i__1].i + temp.i 
-                                   * ap[i__1].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           i__2 = k;
-                           i__3 = ix;
-                           z__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, z__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           d_cnjg(&z__3, &ap[k]);
-                           i__2 = ix;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L130: */
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-                   kk -= j;
-/* L140: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   k = kk + 1;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = kk;
-                           z__1.r = temp.r * ap[i__2].r - temp.i * ap[i__2]
-                                   .i, z__1.i = temp.r * ap[i__2].i + temp.i 
-                                   * ap[i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = k;
-                           i__4 = i__;
-                           z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, z__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ++k;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__3 = i__;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ++k;
-/* L160: */
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   kk += *n - j + 1;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = kk;
-                           z__1.r = temp.r * ap[i__2].r - temp.i * ap[i__2]
-                                   .i, z__1.i = temp.r * ap[i__2].i + temp.i 
-                                   * ap[i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           i__3 = k;
-                           i__4 = ix;
-                           z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, z__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           d_cnjg(&z__3, &ap[k]);
-                           i__3 = ix;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of ZTPMV . */
-} /* ztpmv_ */
-/* Subroutine */ int ztpsv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublecomplex *ap, doublecomplex *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg(
-           doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZTPSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix, supplied in packed form. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  AP     - COMPLEX*16       array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    --x;
-    --ap;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("ZTPSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       if (nounit) {
-                           i__1 = j;
-                           z_div(&z__1, &x[j], &ap[kk]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       k = kk - 1;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__;
-                           i__2 = i__;
-                           i__3 = k;
-                           z__2.r = temp.r * ap[i__3].r - temp.i * ap[i__3]
-                                   .i, z__2.i = temp.r * ap[i__3].i + temp.i 
-                                   * ap[i__3].r;
-                           z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - 
-                                   z__2.i;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                           --k;
-/* L10: */
-                       }
-                   }
-                   kk -= j;
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       if (nounit) {
-                           i__1 = jx;
-                           z_div(&z__1, &x[jx], &ap[kk]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = jx;
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = k;
-                           z__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, z__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           z__1.r = x[i__3].r - z__2.r, z__1.i = x[i__3].i - 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       if (nounit) {
-                           i__2 = j;
-                           z_div(&z__1, &x[j], &ap[kk]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       k = kk + 1;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = k;
-                           z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, z__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                           ++k;
-/* L50: */
-                       }
-                   }
-                   kk += *n - j + 1;
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       if (nounit) {
-                           i__2 = jx;
-                           z_div(&z__1, &x[jx], &ap[kk]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = jx;
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = k;
-                           z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, z__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   k = kk;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = k;
-                           i__4 = i__;
-                           z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, z__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ++k;
-/* L90: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &ap[kk + j - 1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__3 = i__;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ++k;
-/* L100: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk + j - 1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   kk += j;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = kx;
-                   if (noconj) {
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           i__3 = k;
-                           i__4 = ix;
-                           z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, z__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &ap[kk + j - 1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__3 = ix;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk + j - 1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-                   kk += j;
-/* L140: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   k = kk;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = k;
-                           i__3 = i__;
-                           z__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, z__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           --k;
-/* L150: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &ap[kk - *n + j]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__2 = i__;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           --k;
-/* L160: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk - *n + j]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   kk -= *n - j + 1;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = kx;
-                   if (noconj) {
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           i__2 = k;
-                           i__3 = ix;
-                           z__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, z__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &ap[kk - *n + j]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__2 = ix;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk - *n + j]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of ZTPSV . */
-} /* ztpsv_ */
-/* Subroutine */ int ztrmm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, 
-       integer *lda, doublecomplex *b, integer *ldb, ftnlen side_len, ftnlen 
-       uplo_len, ftnlen transa_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublecomplex temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZTRMM  performs one of the matrix-matrix operations */
-/*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ) */
-/*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-/*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ). */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE specifies whether  op( A ) multiplies B from */
-/*           the left or right as follows: */
-/*              SIDE = 'L' or 'l'   B := alpha*op( A )*B. */
-/*              SIDE = 'R' or 'r'   B := alpha*B*op( A ). */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-/*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ). */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain the matrix  B,  and  on exit  is overwritten  by the */
-/*           transformed matrix. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    noconj = lsame_(transa, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("ZTRMM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0. && alpha->i == 0.) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               i__3 = i__ + j * b_dim1;
-               b[i__3].r = 0., b[i__3].i = 0.;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*A*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * b_dim1;
-                       if (b[i__3].r != 0. || b[i__3].i != 0.) {
-                           i__3 = k + j * b_dim1;
-                           z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, z__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           i__3 = k - 1;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * a_dim1;
-                               z__2.r = temp.r * a[i__6].r - temp.i * a[i__6]
-                                       .i, z__2.i = temp.r * a[i__6].i + 
-                                       temp.i * a[i__6].r;
-                               z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5]
-                                       .i + z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L30: */
-                           }
-                           if (nounit) {
-                               i__3 = k + k * a_dim1;
-                               z__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
-                                       .i, z__1.i = temp.r * a[i__3].i + 
-                                       temp.i * a[i__3].r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__3 = k + j * b_dim1;
-                           b[i__3].r = temp.r, b[i__3].i = temp.i;
-                       }
-/* L40: */
-                   }
-/* L50: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (k = *m; k >= 1; --k) {
-                       i__2 = k + j * b_dim1;
-                       if (b[i__2].r != 0. || b[i__2].i != 0.) {
-                           i__2 = k + j * b_dim1;
-                           z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2]
-                                   .i, z__1.i = alpha->r * b[i__2].i + 
-                                   alpha->i * b[i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           i__2 = k + j * b_dim1;
-                           b[i__2].r = temp.r, b[i__2].i = temp.i;
-                           if (nounit) {
-                               i__2 = k + j * b_dim1;
-                               i__3 = k + j * b_dim1;
-                               i__4 = k + k * a_dim1;
-                               z__1.r = b[i__3].r * a[i__4].r - b[i__3].i * 
-                                       a[i__4].i, z__1.i = b[i__3].r * a[
-                                       i__4].i + b[i__3].i * a[i__4].r;
-                               b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = k + 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * a_dim1;
-                               z__2.r = temp.r * a[i__5].r - temp.i * a[i__5]
-                                       .i, z__2.i = temp.r * a[i__5].i + 
-                                       temp.i * a[i__5].r;
-                               z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4]
-                                       .i + z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L60: */
-                           }
-                       }
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       i__2 = i__ + j * b_dim1;
-                       temp.r = b[i__2].r, temp.i = b[i__2].i;
-                       if (noconj) {
-                           if (nounit) {
-                               i__2 = i__ + i__ * a_dim1;
-                               z__1.r = temp.r * a[i__2].r - temp.i * a[i__2]
-                                       .i, z__1.i = temp.r * a[i__2].i + 
-                                       temp.i * a[i__2].r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__2 = i__ - 1;
-                           for (k = 1; k <= i__2; ++k) {
-                               i__3 = k + i__ * a_dim1;
-                               i__4 = k + j * b_dim1;
-                               z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * 
-                                       b[i__4].i, z__2.i = a[i__3].r * b[
-                                       i__4].i + a[i__3].i * b[i__4].r;
-                               z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                           }
-                       } else {
-                           if (nounit) {
-                               d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
-                               z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                       z__1.i = temp.r * z__2.i + temp.i * 
-                                       z__2.r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__2 = i__ - 1;
-                           for (k = 1; k <= i__2; ++k) {
-                               d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                               i__3 = k + j * b_dim1;
-                               z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3]
-                                       .i, z__2.i = z__3.r * b[i__3].i + 
-                                       z__3.i * b[i__3].r;
-                               z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                           }
-                       }
-                       i__2 = i__ + j * b_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L110: */
-                   }
-/* L120: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       temp.r = b[i__3].r, temp.i = b[i__3].i;
-                       if (noconj) {
-                           if (nounit) {
-                               i__3 = i__ + i__ * a_dim1;
-                               z__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
-                                       .i, z__1.i = temp.r * a[i__3].i + 
-                                       temp.i * a[i__3].r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__3 = *m;
-                           for (k = i__ + 1; k <= i__3; ++k) {
-                               i__4 = k + i__ * a_dim1;
-                               i__5 = k + j * b_dim1;
-                               z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * 
-                                       b[i__5].i, z__2.i = a[i__4].r * b[
-                                       i__5].i + a[i__4].i * b[i__5].r;
-                               z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L130: */
-                           }
-                       } else {
-                           if (nounit) {
-                               d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
-                               z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                       z__1.i = temp.r * z__2.i + temp.i * 
-                                       z__2.r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__3 = *m;
-                           for (k = i__ + 1; k <= i__3; ++k) {
-                               d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                               i__4 = k + j * b_dim1;
-                               z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4]
-                                       .i, z__2.i = z__3.r * b[i__4].i + 
-                                       z__3.i * b[i__4].r;
-                               z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L140: */
-                           }
-                       }
-                       i__3 = i__ + j * b_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L150: */
-                   }
-/* L160: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*B*A. */
-           if (upper) {
-               for (j = *n; j >= 1; --j) {
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       i__1 = j + j * a_dim1;
-                       z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                               z__1.i = temp.r * a[i__1].i + temp.i * a[i__1]
-                               .r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                   }
-                   i__1 = *m;
-                   for (i__ = 1; i__ <= i__1; ++i__) {
-                       i__2 = i__ + j * b_dim1;
-                       i__3 = i__ + j * b_dim1;
-                       z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                               z__1.i = temp.r * b[i__3].i + temp.i * b[i__3]
-                               .r;
-                       b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L170: */
-                   }
-                   i__1 = j - 1;
-                   for (k = 1; k <= i__1; ++k) {
-                       i__2 = k + j * a_dim1;
-                       if (a[i__2].r != 0. || a[i__2].i != 0.) {
-                           i__2 = k + j * a_dim1;
-                           z__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2]
-                                   .i, z__1.i = alpha->r * a[i__2].i + 
-                                   alpha->i * a[i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, z__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4]
-                                       .i + z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L180: */
-                           }
-                       }
-/* L190: */
-                   }
-/* L200: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       i__2 = j + j * a_dim1;
-                       z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                               z__1.i = temp.r * a[i__2].i + temp.i * a[i__2]
-                               .r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                   }
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       i__4 = i__ + j * b_dim1;
-                       z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                               z__1.i = temp.r * b[i__4].i + temp.i * b[i__4]
-                               .r;
-                       b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L210: */
-                   }
-                   i__2 = *n;
-                   for (k = j + 1; k <= i__2; ++k) {
-                       i__3 = k + j * a_dim1;
-                       if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                           i__3 = k + j * a_dim1;
-                           z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3]
-                                   .i, z__1.i = alpha->r * a[i__3].i + 
-                                   alpha->i * a[i__3].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, z__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5]
-                                       .i + z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L220: */
-                           }
-                       }
-/* L230: */
-                   }
-/* L240: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ). */
-           if (upper) {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   i__2 = k - 1;
-                   for (j = 1; j <= i__2; ++j) {
-                       i__3 = j + k * a_dim1;
-                       if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                           if (noconj) {
-                               i__3 = j + k * a_dim1;
-                               z__1.r = alpha->r * a[i__3].r - alpha->i * a[
-                                       i__3].i, z__1.i = alpha->r * a[i__3]
-                                       .i + alpha->i * a[i__3].r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           } else {
-                               d_cnjg(&z__2, &a[j + k * a_dim1]);
-                               z__1.r = alpha->r * z__2.r - alpha->i * 
-                                       z__2.i, z__1.i = alpha->r * z__2.i + 
-                                       alpha->i * z__2.r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, z__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5]
-                                       .i + z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L250: */
-                           }
-                       }
-/* L260: */
-                   }
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       if (noconj) {
-                           i__2 = k + k * a_dim1;
-                           z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   z__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       } else {
-                           d_cnjg(&z__2, &a[k + k * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   if (temp.r != 1. || temp.i != 0.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   z__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L270: */
-                       }
-                   }
-/* L280: */
-               }
-           } else {
-               for (k = *n; k >= 1; --k) {
-                   i__1 = *n;
-                   for (j = k + 1; j <= i__1; ++j) {
-                       i__2 = j + k * a_dim1;
-                       if (a[i__2].r != 0. || a[i__2].i != 0.) {
-                           if (noconj) {
-                               i__2 = j + k * a_dim1;
-                               z__1.r = alpha->r * a[i__2].r - alpha->i * a[
-                                       i__2].i, z__1.i = alpha->r * a[i__2]
-                                       .i + alpha->i * a[i__2].r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           } else {
-                               d_cnjg(&z__2, &a[j + k * a_dim1]);
-                               z__1.r = alpha->r * z__2.r - alpha->i * 
-                                       z__2.i, z__1.i = alpha->r * z__2.i + 
-                                       alpha->i * z__2.r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, z__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4]
-                                       .i + z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L290: */
-                           }
-                       }
-/* L300: */
-                   }
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       if (noconj) {
-                           i__1 = k + k * a_dim1;
-                           z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   z__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       } else {
-                           d_cnjg(&z__2, &a[k + k * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   if (temp.r != 1. || temp.i != 0.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   z__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L310: */
-                       }
-                   }
-/* L320: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of ZTRMM . */
-} /* ztrmm_ */
-/* Subroutine */ int ztrmv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, 
-       ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZTRMV  performs one of the matrix-vector operations */
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   x := A*x. */
-/*              TRANS = 'T' or 't'   x := A'*x. */
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("ZTRMV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := A*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__2 = j;
-                           i__3 = j;
-                           i__4 = j + j * a_dim1;
-                           z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
-                                   i__4].i, z__1.i = x[i__3].r * a[i__4].i + 
-                                   x[i__3].i * a[i__4].r;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = kx;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__2 = jx;
-                           i__3 = jx;
-                           i__4 = j + j * a_dim1;
-                           z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
-                                   i__4].i, z__1.i = x[i__3].r * a[i__4].i + 
-                                   x[i__3].i * a[i__4].r;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                   }
-                   jx += *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__1 = j;
-                           i__2 = j;
-                           i__3 = j + j * a_dim1;
-                           z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, z__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = kx;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__1 = jx;
-                           i__2 = jx;
-                           i__3 = j + j * a_dim1;
-                           z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, z__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                   }
-                   jx -= *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = j + j * a_dim1;
-                           z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   z__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__ + j * a_dim1;
-                           i__2 = i__;
-                           z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, z__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__1 = i__;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = j + j * a_dim1;
-                           z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   z__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           i__1 = i__ + j * a_dim1;
-                           i__2 = ix;
-                           z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, z__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__1 = ix;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L130: */
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = j + j * a_dim1;
-                           z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   z__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = i__;
-                           z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, z__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__3 = i__;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L160: */
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = j + j * a_dim1;
-                           z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   z__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = ix;
-                           z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, z__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__3 = ix;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of ZTRMV . */
-} /* ztrmv_ */
-/* Subroutine */ int ztrsm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, 
-       integer *lda, doublecomplex *b, integer *ldb, ftnlen side_len, ftnlen 
-       uplo_len, ftnlen transa_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6, i__7;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg(
-           doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublecomplex temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZTRSM  solves one of the matrix equations */
-/*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B, */
-/*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-/*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ). */
-/*  The matrix X is overwritten on B. */
-/*  Parameters */
-/*  ========== */
-/*  SIDE   - CHARACTER*1. */
-/*           On entry, SIDE specifies whether op( A ) appears on the left */
-/*           or right of X as follows: */
-/*              SIDE = 'L' or 'l'   op( A )*X = alpha*B. */
-/*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B. */
-/*           Unchanged on exit. */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-/*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ). */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain  the  right-hand  side  matrix  B,  and  on exit  is */
-/*           overwritten by the solution matrix  X. */
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-/*  Level 3 Blas routine. */
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    noconj = lsame_(transa, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("ZTRSM ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-/*     And when */
-    if (alpha->r == 0. && alpha->i == 0.) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               i__3 = i__ + j * b_dim1;
-               b[i__3].r = 0., b[i__3].i = 0.;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-/*     Start the operations. */
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*inv( A )*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, z__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L30: */
-                       }
-                   }
-                   for (k = *m; k >= 1; --k) {
-                       i__2 = k + j * b_dim1;
-                       if (b[i__2].r != 0. || b[i__2].i != 0.) {
-                           if (nounit) {
-                               i__2 = k + j * b_dim1;
-                               z_div(&z__1, &b[k + j * b_dim1], &a[k + k * 
-                                       a_dim1]);
-                               b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-                           }
-                           i__2 = k - 1;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = k + j * b_dim1;
-                               i__6 = i__ + k * a_dim1;
-                               z__2.r = b[i__5].r * a[i__6].r - b[i__5].i * 
-                                       a[i__6].i, z__2.i = b[i__5].r * a[
-                                       i__6].i + b[i__5].i * a[i__6].r;
-                               z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4]
-                                       .i - z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L40: */
-                           }
-                       }
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, z__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L70: */
-                       }
-                   }
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * b_dim1;
-                       if (b[i__3].r != 0. || b[i__3].i != 0.) {
-                           if (nounit) {
-                               i__3 = k + j * b_dim1;
-                               z_div(&z__1, &b[k + j * b_dim1], &a[k + k * 
-                                       a_dim1]);
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = k + 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = k + j * b_dim1;
-                               i__7 = i__ + k * a_dim1;
-                               z__2.r = b[i__6].r * a[i__7].r - b[i__6].i * 
-                                       a[i__7].i, z__2.i = b[i__6].r * a[
-                                       i__7].i + b[i__6].i * a[i__7].r;
-                               z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5]
-                                       .i - z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L80: */
-                           }
-                       }
-/* L90: */
-                   }
-/* L100: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*inv( A' )*B */
-/*           or    B := alpha*inv( conjg( A' ) )*B. */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               z__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       if (noconj) {
-                           i__3 = i__ - 1;
-                           for (k = 1; k <= i__3; ++k) {
-                               i__4 = k + i__ * a_dim1;
-                               i__5 = k + j * b_dim1;
-                               z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * 
-                                       b[i__5].i, z__2.i = a[i__4].r * b[
-                                       i__5].i + a[i__4].i * b[i__5].r;
-                               z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L110: */
-                           }
-                           if (nounit) {
-                               z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                       } else {
-                           i__3 = i__ - 1;
-                           for (k = 1; k <= i__3; ++k) {
-                               d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                               i__4 = k + j * b_dim1;
-                               z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4]
-                                       .i, z__2.i = z__3.r * b[i__4].i + 
-                                       z__3.i * b[i__4].r;
-                               z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L120: */
-                           }
-                           if (nounit) {
-                               d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
-                               z_div(&z__1, &temp, &z__2);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                       }
-                       i__3 = i__ + j * b_dim1;
-                       b[i__3].r = temp.r, b[i__3].i = temp.i;
-/* L130: */
-                   }
-/* L140: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       i__2 = i__ + j * b_dim1;
-                       z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                               z__1.i = alpha->r * b[i__2].i + alpha->i * b[
-                               i__2].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       if (noconj) {
-                           i__2 = *m;
-                           for (k = i__ + 1; k <= i__2; ++k) {
-                               i__3 = k + i__ * a_dim1;
-                               i__4 = k + j * b_dim1;
-                               z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * 
-                                       b[i__4].i, z__2.i = a[i__3].r * b[
-                                       i__4].i + a[i__3].i * b[i__4].r;
-                               z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L150: */
-                           }
-                           if (nounit) {
-                               z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                       } else {
-                           i__2 = *m;
-                           for (k = i__ + 1; k <= i__2; ++k) {
-                               d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                               i__3 = k + j * b_dim1;
-                               z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3]
-                                       .i, z__2.i = z__3.r * b[i__3].i + 
-                                       z__3.i * b[i__3].r;
-                               z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L160: */
-                           }
-                           if (nounit) {
-                               d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
-                               z_div(&z__1, &temp, &z__2);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                       }
-                       i__2 = i__ + j * b_dim1;
-                       b[i__2].r = temp.r, b[i__2].i = temp.i;
-/* L170: */
-                   }
-/* L180: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-/*           Form  B := alpha*B*inv( A ). */
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, z__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = j - 1;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * a_dim1;
-                       if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = k + j * a_dim1;
-                               i__7 = i__ + k * b_dim1;
-                               z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * 
-                                       b[i__7].i, z__2.i = a[i__6].r * b[
-                                       i__7].i + a[i__6].i * b[i__7].r;
-                               z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5]
-                                       .i - z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L200: */
-                           }
-                       }
-/* L210: */
-                   }
-                   if (nounit) {
-                       z_div(&z__1, &c_b2094, &a[j + j * a_dim1]);
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   z__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L220: */
-                       }
-                   }
-/* L230: */
-               }
-           } else {
-               for (j = *n; j >= 1; --j) {
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + j * b_dim1;
-                           i__3 = i__ + j * b_dim1;
-                           z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, z__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L240: */
-                       }
-                   }
-                   i__1 = *n;
-                   for (k = j + 1; k <= i__1; ++k) {
-                       i__2 = k + j * a_dim1;
-                       if (a[i__2].r != 0. || a[i__2].i != 0.) {
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = k + j * a_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               z__2.r = a[i__5].r * b[i__6].r - a[i__5].i * 
-                                       b[i__6].i, z__2.i = a[i__5].r * b[
-                                       i__6].i + a[i__5].i * b[i__6].r;
-                               z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4]
-                                       .i - z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L250: */
-                           }
-                       }
-/* L260: */
-                   }
-                   if (nounit) {
-                       z_div(&z__1, &c_b2094, &a[j + j * a_dim1]);
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + j * b_dim1;
-                           i__3 = i__ + j * b_dim1;
-                           z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   z__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L270: */
-                       }
-                   }
-/* L280: */
-               }
-           }
-       } else {
-/*           Form  B := alpha*B*inv( A' ) */
-/*           or    B := alpha*B*inv( conjg( A' ) ). */
-           if (upper) {
-               for (k = *n; k >= 1; --k) {
-                   if (nounit) {
-                       if (noconj) {
-                           z_div(&z__1, &c_b2094, &a[k + k * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       } else {
-                           d_cnjg(&z__2, &a[k + k * a_dim1]);
-                           z_div(&z__1, &c_b2094, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   z__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L290: */
-                       }
-                   }
-                   i__1 = k - 1;
-                   for (j = 1; j <= i__1; ++j) {
-                       i__2 = j + k * a_dim1;
-                       if (a[i__2].r != 0. || a[i__2].i != 0.) {
-                           if (noconj) {
-                               i__2 = j + k * a_dim1;
-                               temp.r = a[i__2].r, temp.i = a[i__2].i;
-                           } else {
-                               d_cnjg(&z__1, &a[j + k * a_dim1]);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, z__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4]
-                                       .i - z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L300: */
-                           }
-                       }
-/* L310: */
-                   }
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, z__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L320: */
-                       }
-                   }
-/* L330: */
-               }
-           } else {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   if (nounit) {
-                       if (noconj) {
-                           z_div(&z__1, &c_b2094, &a[k + k * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       } else {
-                           d_cnjg(&z__2, &a[k + k * a_dim1]);
-                           z_div(&z__1, &c_b2094, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   z__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L340: */
-                       }
-                   }
-                   i__2 = *n;
-                   for (j = k + 1; j <= i__2; ++j) {
-                       i__3 = j + k * a_dim1;
-                       if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                           if (noconj) {
-                               i__3 = j + k * a_dim1;
-                               temp.r = a[i__3].r, temp.i = a[i__3].i;
-                           } else {
-                               d_cnjg(&z__1, &a[j + k * a_dim1]);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, z__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5]
-                                       .i - z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L350: */
-                           }
-                       }
-/* L360: */
-                   }
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, z__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L370: */
-                       }
-                   }
-/* L380: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of ZTRSM . */
-} /* ztrsm_ */
-/* Subroutine */ int ztrsv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, 
-       ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-    /* Builtin functions */
-    void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg(
-           doublecomplex *, doublecomplex *);
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-/*  Purpose */
-/*  ======= */
-/*  ZTRSV  solves one of the systems of equations */
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix. */
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-/*  Parameters */
-/*  ========== */
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-/*           Unchanged on exit. */
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-/*              TRANS = 'N' or 'n'   A*x = b. */
-/*              TRANS = 'T' or 't'   A'*x = b. */
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-/*           Unchanged on exit. */
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-/*           Unchanged on exit. */
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-/*  Level 2 Blas routine. */
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-/*     Test the input parameters. */
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("ZTRSV ", &info, (ftnlen)6);
-       return 0;
-    }
-/*     Quick return if possible. */
-    if (*n == 0) {
-       return 0;
-    }
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-/*        Form  x := inv( A )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       if (nounit) {
-                           i__1 = j;
-                           z_div(&z__1, &x[j], &a[j + j * a_dim1]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__;
-                           i__2 = i__;
-                           i__3 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   z__2.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - 
-                                   z__2.i;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       if (nounit) {
-                           i__1 = jx;
-                           z_div(&z__1, &x[jx], &a[j + j * a_dim1]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = jx;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           i__1 = ix;
-                           i__2 = ix;
-                           i__3 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   z__2.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - 
-                                   z__2.i;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       if (nounit) {
-                           i__2 = j;
-                           z_div(&z__1, &x[j], &a[j + j * a_dim1]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       if (nounit) {
-                           i__2 = jx;
-                           z_div(&z__1, &x[jx], &a[j + j * a_dim1]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = jx;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = i__;
-                           z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, z__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__3 = i__;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   ix = kx;
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = ix;
-                           z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, z__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__3 = ix;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__ + j * a_dim1;
-                           i__3 = i__;
-                           z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, z__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L150: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__2 = i__;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L160: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   ix = kx;
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__ + j * a_dim1;
-                           i__3 = ix;
-                           z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, z__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__2 = ix;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-    return 0;
-/*     End of ZTRSV . */
-} /* ztrsv_ */
diff --git a/superlu/BLAS/License.txt b/superlu/BLAS/License.txt
deleted file mode 100644
index b7ca013b..00000000
--- a/superlu/BLAS/License.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-  The reference BLAS is a freely-available software package. It is available 
from netlib via anonymous ftp
-  and the World Wide Web. Thus, it can be included in commercial software 
packages (and has been). We only
-  ask that proper credit be given to the authors.
-  Like all software, it is copyrighted. It is not trademarked, but we do ask 
the following:
-  If you modify the source for these routines we ask that you change the name 
of the routine and comment
-  the changes made to the original.
-  We will gladly answer any questions regarding the software. If a 
modification is done, however, it is the
-  responsibility of the person who modified the routine to provide support.
-  see
diff --git a/superlu/BLAS/caxpy.f b/superlu/BLAS/caxpy.f
deleted file mode 100644
index 7ee77747..00000000
--- a/superlu/BLAS/caxpy.f
+++ /dev/null
@@ -1,102 +0,0 @@
-*> \brief \b CAXPY
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX CA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    CAXPY constant times a vector plus a vector.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-      REAL SCABS1
-*     ..
-      IF (N.LE.0) RETURN
-      IF (SCABS1(CA).EQ.0.0E+0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-         DO I = 1,N
-            CY(I) = CY(I) + CA*CX(I)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CY(IY) = CY(IY) + CA*CX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/ccopy.f b/superlu/BLAS/ccopy.f
deleted file mode 100644
index eeb5f299..00000000
--- a/superlu/BLAS/ccopy.f
+++ /dev/null
@@ -1,94 +0,0 @@
-*> \brief \b CCOPY
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    CCOPY copies a vector x to a vector y.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-         DO I = 1,N
-            CY(I) = CX(I)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CY(IY) = CX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/cdotc.f b/superlu/BLAS/cdotc.f
deleted file mode 100644
index cd341698..00000000
--- a/superlu/BLAS/cdotc.f
+++ /dev/null
@@ -1,103 +0,0 @@
-*> \brief \b CDOTC
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CDOTC forms the dot product of two complex vectors
-*>      CDOTC = X^H * Y
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack,  3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      CTEMP = (0.0,0.0)
-      CDOTC = (0.0,0.0)
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-         DO I = 1,N
-            CTEMP = CTEMP + CONJG(CX(I))*CY(I)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CTEMP = CTEMP + CONJG(CX(IX))*CY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      CDOTC = CTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/cdotu.f b/superlu/BLAS/cdotu.f
deleted file mode 100644
index 1e127bc0..00000000
--- a/superlu/BLAS/cdotu.f
+++ /dev/null
@@ -1,100 +0,0 @@
-*> \brief \b CDOTU
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CDOTU forms the dot product of two complex vectors
-*>      CDOTU = X^T * Y
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-      CTEMP = (0.0,0.0)
-      CDOTU = (0.0,0.0)
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-         DO I = 1,N
-            CTEMP = CTEMP + CX(I)*CY(I)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CTEMP = CTEMP + CX(IX)*CY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      CDOTU = CTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/cgbmv.f b/superlu/BLAS/cgbmv.f
deleted file mode 100644
index de12852a..00000000
--- a/superlu/BLAS/cgbmv.f
+++ /dev/null
@@ -1,390 +0,0 @@
-*> \brief \b CGBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CGBMV  performs one of the matrix-vector operations
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,   or
-*>    y := alpha*A**H*x + beta*y,
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n band matrix, with kl sub-diagonals and ku super-diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>              TRANS = 'C' or 'c'   y := alpha*A**H*x + beta*y.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] KL
-*> \verbatim
-*>          KL is INTEGER
-*>           On entry, KL specifies the number of sub-diagonals of the
-*>           matrix A. KL must satisfy  0 .le. KL.
-*> \endverbatim
-*> \param[in] KU
-*> \verbatim
-*>          KU is INTEGER
-*>           On entry, KU specifies the number of super-diagonals of the
-*>           matrix A. KU must satisfy  0 .le. KU.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading ( kl + ku + 1 ) by n part of the
-*>           array A must contain the matrix of coefficients, supplied
-*>           column by column, with the leading diagonal of the matrix in
-*>           row ( ku + 1 ) of the array, the first super-diagonal
-*>           starting at position 2 in row ku, the first sub-diagonal
-*>           starting at position 1 in row ( ku + 2 ), and so on.
-*>           Elements in the array A that do not correspond to elements
-*>           in the band matrix (such as the top left ku by ku triangle)
-*>           are not referenced.
-*>           The following program segment will transfer a band matrix
-*>           from conventional full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    K = KU + 1 - J
-*>                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
-*>                       A( K + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( kl + ku + 1 ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (KL.LT.0) THEN
-          INFO = 4
-      ELSE IF (KU.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (KL+KU+1)) THEN
-          INFO = 8
-          INFO = 10
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the band part of A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      KUP1 = KU + 1
-*        Form  y := alpha*A*x + y.
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  K = KUP1 - J
-                  DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(I) = Y(I) + TEMP*A(K+I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  K = KUP1 - J
-                  DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(IY) = Y(IY) + TEMP*A(K+I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-                  IF (J.GT.KU) KY = KY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y := alpha*A**T*x + y  or  y := alpha*A**H*x + y.
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 110 J = 1,N
-                  TEMP = ZERO
-                  K = KUP1 - J
-                  IF (NOCONJ) THEN
-                      DO 90 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + A(K+I,J)*X(I)
-   90                 CONTINUE
-                  ELSE
-                      DO 100 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + CONJG(A(K+I,J))*X(I)
-  100                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  110         CONTINUE
-          ELSE
-              DO 140 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  K = KUP1 - J
-                  IF (NOCONJ) THEN
-                      DO 120 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + A(K+I,J)*X(IX)
-                          IX = IX + INCX
-  120                 CONTINUE
-                  ELSE
-                      DO 130 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + CONJG(A(K+I,J))*X(IX)
-                          IX = IX + INCX
-  130                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-                  IF (J.GT.KU) KX = KX + INCX
-  140         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CGBMV .
-      END
diff --git a/superlu/BLAS/cgemm.f b/superlu/BLAS/cgemm.f
deleted file mode 100644
index 018ffad6..00000000
--- a/superlu/BLAS/cgemm.f
+++ /dev/null
@@ -1,483 +0,0 @@
-*> \brief \b CGEMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CGEMM  performs one of the matrix-matrix operations
-*>    C := alpha*op( A )*op( B ) + beta*C,
-*> where  op( X ) is one of
-*>    op( X ) = X   or   op( X ) = X**T   or   op( X ) = X**H,
-*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
-*> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n',  op( A ) = A.
-*>              TRANSA = 'T' or 't',  op( A ) = A**T.
-*>              TRANSA = 'C' or 'c',  op( A ) = A**H.
-*> \endverbatim
-*> \param[in] TRANSB
-*> \verbatim
-*>          TRANSB is CHARACTER*1
-*>           On entry, TRANSB specifies the form of op( B ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSB = 'N' or 'n',  op( B ) = B.
-*>              TRANSB = 'T' or 't',  op( B ) = B**T.
-*>              TRANSB = 'C' or 'c',  op( B ) = B**H.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies  the number  of rows  of the  matrix
-*>           op( A )  and of the  matrix  C.  M  must  be at least  zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N  specifies the number  of columns of the matrix
-*>           op( B ) and the number of columns of the matrix C. N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry,  K  specifies  the number of columns of the matrix
-*>           op( A ) and the number of rows of the matrix op( B ). K must
-*>           be at least  zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-*>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by m  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, k ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, kb ), where kb is
-*>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-*>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  n by k  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
-*>           LDB must be at least  max( 1, k ), otherwise  LDB must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n  matrix
-*>           ( alpha*op( A )*op( B ) + beta*C ).
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and
-*     B  respectively are to be  transposed but  not conjugated  and set
-*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
-*     and the number of rows of  B  respectively.
-      IF (NOTA) THEN
-          NROWA = M
-          NCOLA = K
-      ELSE
-          NROWA = K
-          NCOLA = M
-      END IF
-      IF (NOTB) THEN
-          NROWB = K
-      ELSE
-          NROWB = N
-      END IF
-*     Test the input parameters.
-      INFO = 0
-      IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND.
-     +    (.NOT.LSAME(TRANSA,'T'))) THEN
-          INFO = 1
-     +         (.NOT.LSAME(TRANSB,'T'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-          INFO = 8
-          INFO = 10
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-      IF (NOTB) THEN
-          IF (NOTA) THEN
-*           Form  C := alpha*A*B + beta*C.
-              DO 90 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 50 I = 1,M
-                          C(I,J) = ZERO
-   50                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 60 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-   60                 CONTINUE
-                  END IF
-                  DO 80 L = 1,K
-                      TEMP = ALPHA*B(L,J)
-                      DO 70 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-   70                 CONTINUE
-   80             CONTINUE
-   90         CONTINUE
-          ELSE IF (CONJA) THEN
-*           Form  C := alpha*A**H*B + beta*C.
-              DO 120 J = 1,N
-                  DO 110 I = 1,M
-                      TEMP = ZERO
-                      DO 100 L = 1,K
-                          TEMP = TEMP + CONJG(A(L,I))*B(L,J)
-  100                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  110             CONTINUE
-  120         CONTINUE
-          ELSE
-*           Form  C := alpha*A**T*B + beta*C
-              DO 150 J = 1,N
-                  DO 140 I = 1,M
-                      TEMP = ZERO
-                      DO 130 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(L,J)
-  130                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  140             CONTINUE
-  150         CONTINUE
-          END IF
-          IF (CONJB) THEN
-*           Form  C := alpha*A*B**H + beta*C.
-              DO 200 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 160 I = 1,M
-                          C(I,J) = ZERO
-  160                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 170 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  170                 CONTINUE
-                  END IF
-                  DO 190 L = 1,K
-                      TEMP = ALPHA*CONJG(B(J,L))
-                      DO 180 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  180                 CONTINUE
-  190             CONTINUE
-  200         CONTINUE
-          ELSE
-*           Form  C := alpha*A*B**T + beta*C
-              DO 250 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 210 I = 1,M
-                          C(I,J) = ZERO
-  210                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 220 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  220                 CONTINUE
-                  END IF
-                  DO 240 L = 1,K
-                      TEMP = ALPHA*B(J,L)
-                      DO 230 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  230                 CONTINUE
-  240             CONTINUE
-  250         CONTINUE
-          END IF
-          IF (CONJB) THEN
-*           Form  C := alpha*A**H*B**H + beta*C.
-              DO 280 J = 1,N
-                  DO 270 I = 1,M
-                      TEMP = ZERO
-                      DO 260 L = 1,K
-                          TEMP = TEMP + CONJG(A(L,I))*CONJG(B(J,L))
-  260                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  270             CONTINUE
-  280         CONTINUE
-          ELSE
-*           Form  C := alpha*A**H*B**T + beta*C
-              DO 310 J = 1,N
-                  DO 300 I = 1,M
-                      TEMP = ZERO
-                      DO 290 L = 1,K
-                          TEMP = TEMP + CONJG(A(L,I))*B(J,L)
-  290                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  300             CONTINUE
-  310         CONTINUE
-          END IF
-      ELSE
-          IF (CONJB) THEN
-*           Form  C := alpha*A**T*B**H + beta*C
-              DO 340 J = 1,N
-                  DO 330 I = 1,M
-                      TEMP = ZERO
-                      DO 320 L = 1,K
-                          TEMP = TEMP + A(L,I)*CONJG(B(J,L))
-  320                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  330             CONTINUE
-  340         CONTINUE
-          ELSE
-*           Form  C := alpha*A**T*B**T + beta*C
-              DO 370 J = 1,N
-                  DO 360 I = 1,M
-                      TEMP = ZERO
-                      DO 350 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(J,L)
-  350                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  360             CONTINUE
-  370         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CGEMM .
-      END
diff --git a/superlu/BLAS/cgemv.f b/superlu/BLAS/cgemv.f
deleted file mode 100644
index aeb94090..00000000
--- a/superlu/BLAS/cgemv.f
+++ /dev/null
@@ -1,350 +0,0 @@
-*> \brief \b CGEMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CGEMV performs one of the matrix-vector operations
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,   or
-*>    y := alpha*A**H*x + beta*y,
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>              TRANS = 'C' or 'c'   y := alpha*A**H*x + beta*y.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry with BETA non-zero, the incremented array Y
-*>           must contain the vector y. On exit, Y is overwritten by the
-*>           updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-          INFO = 6
-          INFO = 8
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-*        Form  y := alpha*A*x + y.
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  DO 50 I = 1,M
-                      Y(I) = Y(I) + TEMP*A(I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  DO 70 I = 1,M
-                      Y(IY) = Y(IY) + TEMP*A(I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y := alpha*A**T*x + y  or  y := alpha*A**H*x + y.
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 110 J = 1,N
-                  TEMP = ZERO
-                  IF (NOCONJ) THEN
-                      DO 90 I = 1,M
-                          TEMP = TEMP + A(I,J)*X(I)
-   90                 CONTINUE
-                  ELSE
-                      DO 100 I = 1,M
-                          TEMP = TEMP + CONJG(A(I,J))*X(I)
-  100                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  110         CONTINUE
-          ELSE
-              DO 140 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  IF (NOCONJ) THEN
-                      DO 120 I = 1,M
-                          TEMP = TEMP + A(I,J)*X(IX)
-                          IX = IX + INCX
-  120                 CONTINUE
-                  ELSE
-                      DO 130 I = 1,M
-                          TEMP = TEMP + CONJG(A(I,J))*X(IX)
-                          IX = IX + INCX
-  130                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  140         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CGEMV .
-      END
diff --git a/superlu/BLAS/cgerc.f b/superlu/BLAS/cgerc.f
deleted file mode 100644
index e730edfd..00000000
--- a/superlu/BLAS/cgerc.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b CGERC
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CGERC  performs the rank 1 operation
-*>    A := alpha*x*y**H + A,
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*CONJG(Y(JY))
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*CONJG(Y(JY))
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-      RETURN
-*     End of CGERC .
-      END
diff --git a/superlu/BLAS/cgeru.f b/superlu/BLAS/cgeru.f
deleted file mode 100644
index bc7540fa..00000000
--- a/superlu/BLAS/cgeru.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b CGERU
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CGERU  performs the rank 1 operation
-*>    A := alpha*x*y**T + A,
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-      RETURN
-*     End of CGERU .
-      END
diff --git a/superlu/BLAS/chbmv.f b/superlu/BLAS/chbmv.f
deleted file mode 100644
index 435c8dd2..00000000
--- a/superlu/BLAS/chbmv.f
+++ /dev/null
@@ -1,380 +0,0 @@
-*> \brief \b CHBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CHBMV  performs the matrix-vector  operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian band matrix, with k super-diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the band matrix A is being supplied as
-*>           follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  being supplied.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  being supplied.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry, K specifies the number of super-diagonals of the
-*>           matrix A. K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the hermitian matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer the upper
-*>           triangular part of a hermitian band matrix from conventional
-*>           full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the hermitian matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer the lower
-*>           triangular part of a hermitian band matrix from conventional
-*>           full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (K.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 6
-          INFO = 8
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of the array A
-*     are accessed sequentially with one pass through A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when upper triangle of A is stored.
-          KPLUS1 = K + 1
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  L = KPLUS1 - J
-                  DO 50 I = MAX(1,J-K),J - 1
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*REAL(A(KPLUS1,J)) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  L = KPLUS1 - J
-                  DO 70 I = MAX(1,J-K),J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*REAL(A(KPLUS1,J)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  IF (J.GT.K) THEN
-                      KX = KX + INCX
-                      KY = KY + INCY
-                  END IF
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when lower triangle of A is stored.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*REAL(A(1,J))
-                  L = 1 - J
-                  DO 90 I = J + 1,MIN(N,J+K)
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*REAL(A(1,J))
-                  L = 1 - J
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,MIN(N,J+K)
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CHBMV .
-      END
diff --git a/superlu/BLAS/chemm.f b/superlu/BLAS/chemm.f
deleted file mode 100644
index 834b209a..00000000
--- a/superlu/BLAS/chemm.f
+++ /dev/null
@@ -1,371 +0,0 @@
-*> \brief \b CHEMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CHEMM  performs one of the matrix-matrix operations
-*>    C := alpha*A*B + beta*C,
-*> or
-*>    C := alpha*B*A + beta*C,
-*> where alpha and beta are scalars, A is an hermitian matrix and  B and
-*> C are m by n matrices.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  hermitian matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  hermitian  matrix   A  is  to  be
-*>           referenced as follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  hermitian matrix is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  hermitian matrix is to be referenced.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  hermitian matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  hermitian matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  hermitian
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  hermitian matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  hermitian matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  hermitian
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Note that the imaginary parts  of the diagonal elements need
-*>           not be set, they are assumed to be zero.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least max( 1, n ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     Set NROWA as the number of rows of A.
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     And when
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSAME(SIDE,'L')) THEN
-*        Form  C := alpha*A*B + beta*C.
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*CONJG(A(K,I))
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*REAL(A(I,I)) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*REAL(A(I,I)) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*CONJG(A(K,I))
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*REAL(A(I,I)) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*REAL(A(I,I)) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*B*A + beta*C.
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*REAL(A(J,J))
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*CONJG(A(J,K))
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*CONJG(A(J,K))
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-      RETURN
-*     End of CHEMM .
-      END
diff --git a/superlu/BLAS/chemv.f b/superlu/BLAS/chemv.f
deleted file mode 100644
index 21509297..00000000
--- a/superlu/BLAS/chemv.f
+++ /dev/null
@@ -1,337 +0,0 @@
-*> \brief \b CHEMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CHEMV  performs the matrix-vector  operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when A is stored in upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + CONJG(A(I,J))*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*REAL(A(J,J)) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 I = 1,J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + CONJG(A(I,J))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*REAL(A(J,J)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when A is stored in lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*REAL(A(J,J))
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + CONJG(A(I,J))*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*REAL(A(J,J))
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,N
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + CONJG(A(I,J))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CHEMV .
-      END
diff --git a/superlu/BLAS/cher.f b/superlu/BLAS/cher.f
deleted file mode 100644
index 78a4e0b7..00000000
--- a/superlu/BLAS/cher.f
+++ /dev/null
@@ -1,278 +0,0 @@
-*> \brief \b CHER
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CHER   performs the hermitian rank 1 operation
-*>    A := alpha*x*x**H + A,
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n hermitian matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHER  ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set the start point in X if the increment is not unity.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when A is stored in upper triangle.
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(J))
-                      DO 10 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   10                 CONTINUE
-                      A(J,J) = REAL(A(J,J)) + REAL(X(J)*TEMP)
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(JX))
-                      IX = KX
-                      DO 30 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                      A(J,J) = REAL(A(J,J)) + REAL(X(JX)*TEMP)
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-                  JX = JX + INCX
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when A is stored in lower triangle.
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(J))
-                      A(J,J) = REAL(A(J,J)) + REAL(TEMP*X(J))
-                      DO 50 I = J + 1,N
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   50                 CONTINUE
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(JX))
-                      A(J,J) = REAL(A(J,J)) + REAL(TEMP*X(JX))
-                      IX = JX
-                      DO 70 I = J + 1,N
-                          IX = IX + INCX
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-   70                 CONTINUE
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CHER  .
-      END
diff --git a/superlu/BLAS/cher2.f b/superlu/BLAS/cher2.f
deleted file mode 100644
index fd65f970..00000000
--- a/superlu/BLAS/cher2.f
+++ /dev/null
@@ -1,317 +0,0 @@
-*> \brief \b CHER2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CHER2  performs the hermitian rank 2 operation
-*>    A := alpha*x*y**H + conjg( alpha )*y*x**H + A,
-*> where alpha is a scalar, x and y are n element vectors and A is an n
-*> by n hermitian matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHER2 ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when A is stored in the upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(J))
-                      TEMP2 = CONJG(ALPHA*X(J))
-                      DO 10 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   10                 CONTINUE
-                      A(J,J) = REAL(A(J,J)) +
-     +                         REAL(X(J)*TEMP1+Y(J)*TEMP2)
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(JY))
-                      TEMP2 = CONJG(ALPHA*X(JX))
-                      IX = KX
-                      IY = KY
-                      DO 30 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                      A(J,J) = REAL(A(J,J)) +
-     +                         REAL(X(JX)*TEMP1+Y(JY)*TEMP2)
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when A is stored in the lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(J))
-                      TEMP2 = CONJG(ALPHA*X(J))
-                      A(J,J) = REAL(A(J,J)) +
-     +                         REAL(X(J)*TEMP1+Y(J)*TEMP2)
-                      DO 50 I = J + 1,N
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   50                 CONTINUE
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(JY))
-                      TEMP2 = CONJG(ALPHA*X(JX))
-                      A(J,J) = REAL(A(J,J)) +
-     +                         REAL(X(JX)*TEMP1+Y(JY)*TEMP2)
-                      IX = JX
-                      IY = JY
-                      DO 70 I = J + 1,N
-                          IX = IX + INCX
-                          IY = IY + INCY
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-   70                 CONTINUE
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CHER2 .
-      END
diff --git a/superlu/BLAS/cher2k.f b/superlu/BLAS/cher2k.f
deleted file mode 100644
index ace3c5d2..00000000
--- a/superlu/BLAS/cher2k.f
+++ /dev/null
@@ -1,442 +0,0 @@
-*> \brief \b CHER2K
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL BETA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CHER2K  performs one of the hermitian rank 2k operations
-*>    C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,
-*> or
-*>    C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,
-*> where  alpha and beta  are scalars with  beta  real,  C is an  n by n
-*> hermitian matrix and  A and B  are  n by k matrices in the first case
-*> and  k by n  matrices in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'    C := alpha*A*B**H          +
-*>                                         conjg( alpha )*B*A**H +
-*>                                         beta*C.
-*>              TRANS = 'C' or 'c'    C := alpha*A**H*B          +
-*>                                         conjg( alpha )*B**H*A +
-*>                                         beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
-*>           matrices  A and B.  K must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set,  they are assumed to be zero,  and on exit they
-*>           are set to zero.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*>  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
-*>     Ed Anderson, Cray Research Inc.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      REAL BETA
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      REAL ONE
-      PARAMETER (ONE=1.0E+0)
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.REAL(ZERO)) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-                      C(J,J) = BETA*REAL(C(J,J))
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.REAL(ZERO)) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      C(J,J) = BETA*REAL(C(J,J))
-                      DO 70 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*B**H + conjg( alpha )*B*A**H +
-*                   C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.REAL(ZERO)) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                      C(J,J) = BETA*REAL(C(J,J))
-                  ELSE
-                      C(J,J) = REAL(C(J,J))
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*CONJG(B(J,L))
-                          TEMP2 = CONJG(ALPHA*A(J,L))
-                          DO 110 I = 1,J - 1
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                          C(J,J) = REAL(C(J,J)) +
-     +                             REAL(A(J,L)*TEMP1+B(J,L)*TEMP2)
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.REAL(ZERO)) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                      C(J,J) = BETA*REAL(C(J,J))
-                  ELSE
-                      C(J,J) = REAL(C(J,J))
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*CONJG(B(J,L))
-                          TEMP2 = CONJG(ALPHA*A(J,L))
-                          DO 160 I = J + 1,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                          C(J,J) = REAL(C(J,J)) +
-     +                             REAL(A(J,L)*TEMP1+B(J,L)*TEMP2)
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**H*B + conjg( alpha )*B**H*A +
-*                   C.
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + CONJG(A(L,I))*B(L,J)
-                          TEMP2 = TEMP2 + CONJG(B(L,I))*A(L,J)
-  190                 CONTINUE
-                      IF (I.EQ.J) THEN
-                          IF (BETA.EQ.REAL(ZERO)) THEN
-                              C(J,J) = REAL(ALPHA*TEMP1+
-     +                                 CONJG(ALPHA)*TEMP2)
-                          ELSE
-                              C(J,J) = BETA*REAL(C(J,J)) +
-     +                                 REAL(ALPHA*TEMP1+
-     +                                 CONJG(ALPHA)*TEMP2)
-                          END IF
-                      ELSE
-                          IF (BETA.EQ.REAL(ZERO)) THEN
-                              C(I,J) = ALPHA*TEMP1 + CONJG(ALPHA)*TEMP2
-                          ELSE
-                              C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                                 CONJG(ALPHA)*TEMP2
-                          END IF
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + CONJG(A(L,I))*B(L,J)
-                          TEMP2 = TEMP2 + CONJG(B(L,I))*A(L,J)
-  220                 CONTINUE
-                      IF (I.EQ.J) THEN
-                          IF (BETA.EQ.REAL(ZERO)) THEN
-                              C(J,J) = REAL(ALPHA*TEMP1+
-     +                                 CONJG(ALPHA)*TEMP2)
-                          ELSE
-                              C(J,J) = BETA*REAL(C(J,J)) +
-     +                                 REAL(ALPHA*TEMP1+
-     +                                 CONJG(ALPHA)*TEMP2)
-                          END IF
-                      ELSE
-                          IF (BETA.EQ.REAL(ZERO)) THEN
-                              C(I,J) = ALPHA*TEMP1 + CONJG(ALPHA)*TEMP2
-                          ELSE
-                              C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                                 CONJG(ALPHA)*TEMP2
-                          END IF
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CHER2K.
-      END
diff --git a/superlu/BLAS/cherk.f b/superlu/BLAS/cherk.f
deleted file mode 100644
index 1c47e57b..00000000
--- a/superlu/BLAS/cherk.f
+++ /dev/null
@@ -1,396 +0,0 @@
-*> \brief \b CHERK
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CHERK  performs one of the hermitian rank k operations
-*>    C := alpha*A*A**H + beta*C,
-*> or
-*>    C := alpha*A**H*A + beta*C,
-*> where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
-*> matrix and  A  is an  n by k  matrix in the  first case and a  k by n
-*> matrix in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**H + beta*C.
-*>              TRANS = 'C' or 'c'   C := alpha*A**H*A + beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
-*>           matrix A.  K must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set,  they are assumed to be zero,  and on exit they
-*>           are set to zero.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*>  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
-*>     Ed Anderson, Cray Research Inc.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-                      C(J,J) = BETA*REAL(C(J,J))
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      C(J,J) = BETA*REAL(C(J,J))
-                      DO 70 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*A**H + beta*C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                      C(J,J) = BETA*REAL(C(J,J))
-                  ELSE
-                      C(J,J) = REAL(C(J,J))
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.CMPLX(ZERO)) THEN
-                          TEMP = ALPHA*CONJG(A(J,L))
-                          DO 110 I = 1,J - 1
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                          C(J,J) = REAL(C(J,J)) + REAL(TEMP*A(I,L))
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      C(J,J) = BETA*REAL(C(J,J))
-                      DO 150 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  ELSE
-                      C(J,J) = REAL(C(J,J))
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.CMPLX(ZERO)) THEN
-                          TEMP = ALPHA*CONJG(A(J,L))
-                          C(J,J) = REAL(C(J,J)) + REAL(TEMP*A(J,L))
-                          DO 160 I = J + 1,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**H*A + beta*C.
-          IF (UPPER) THEN
-              DO 220 J = 1,N
-                  DO 200 I = 1,J - 1
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + CONJG(A(L,I))*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-                  RTEMP = ZERO
-                  DO 210 L = 1,K
-                      RTEMP = RTEMP + CONJG(A(L,J))*A(L,J)
-  210             CONTINUE
-                  IF (BETA.EQ.ZERO) THEN
-                      C(J,J) = ALPHA*RTEMP
-                  ELSE
-                      C(J,J) = ALPHA*RTEMP + BETA*REAL(C(J,J))
-                  END IF
-  220         CONTINUE
-          ELSE
-              DO 260 J = 1,N
-                  RTEMP = ZERO
-                  DO 230 L = 1,K
-                      RTEMP = RTEMP + CONJG(A(L,J))*A(L,J)
-  230             CONTINUE
-                  IF (BETA.EQ.ZERO) THEN
-                      C(J,J) = ALPHA*RTEMP
-                  ELSE
-                      C(J,J) = ALPHA*RTEMP + BETA*REAL(C(J,J))
-                  END IF
-                  DO 250 I = J + 1,N
-                      TEMP = ZERO
-                      DO 240 L = 1,K
-                          TEMP = TEMP + CONJG(A(L,I))*A(L,J)
-  240                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  250             CONTINUE
-  260         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CHERK .
-      END
diff --git a/superlu/BLAS/chpmv.f b/superlu/BLAS/chpmv.f
deleted file mode 100644
index b182bfb9..00000000
--- a/superlu/BLAS/chpmv.f
+++ /dev/null
@@ -1,338 +0,0 @@
-*> \brief \b CHPMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX AP(*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CHPMV  performs the matrix-vector operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX AP(*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 6
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when AP contains the upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  K = KK
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + CONJG(AP(K))*X(I)
-                      K = K + 1
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*REAL(AP(KK+J-1)) + ALPHA*TEMP2
-                  KK = KK + J
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 K = KK,KK + J - 2
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + CONJG(AP(K))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*REAL(AP(KK+J-1)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when AP contains the lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*REAL(AP(KK))
-                  K = KK + 1
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + CONJG(AP(K))*X(I)
-                      K = K + 1
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-                  KK = KK + (N-J+1)
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*REAL(AP(KK))
-                  IX = JX
-                  IY = JY
-                  DO 110 K = KK + 1,KK + N - J
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + CONJG(AP(K))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + (N-J+1)
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CHPMV .
-      END
diff --git a/superlu/BLAS/chpr.f b/superlu/BLAS/chpr.f
deleted file mode 100644
index 6212c043..00000000
--- a/superlu/BLAS/chpr.f
+++ /dev/null
@@ -1,279 +0,0 @@
-*> \brief \b CHPR
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CHPR    performs the hermitian rank 1 operation
-*>    A := alpha*x*x**H + A,
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is COMPLEX array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX AP(*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHPR  ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set the start point in X if the increment is not unity.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when upper triangle is stored in AP.
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(J))
-                      K = KK
-                      DO 10 I = 1,J - 1
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   10                 CONTINUE
-                      AP(KK+J-1) = REAL(AP(KK+J-1)) + REAL(X(J)*TEMP)
-                  ELSE
-                      AP(KK+J-1) = REAL(AP(KK+J-1))
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(JX))
-                      IX = KX
-                      DO 30 K = KK,KK + J - 2
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                      AP(KK+J-1) = REAL(AP(KK+J-1)) + REAL(X(JX)*TEMP)
-                  ELSE
-                      AP(KK+J-1) = REAL(AP(KK+J-1))
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when lower triangle is stored in AP.
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(J))
-                      AP(KK) = REAL(AP(KK)) + REAL(TEMP*X(J))
-                      K = KK + 1
-                      DO 50 I = J + 1,N
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   50                 CONTINUE
-                  ELSE
-                      AP(KK) = REAL(AP(KK))
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(JX))
-                      AP(KK) = REAL(AP(KK)) + REAL(TEMP*X(JX))
-                      IX = JX
-                      DO 70 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          AP(K) = AP(K) + X(IX)*TEMP
-   70                 CONTINUE
-                  ELSE
-                      AP(KK) = REAL(AP(KK))
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CHPR  .
-      END
diff --git a/superlu/BLAS/chpr2.f b/superlu/BLAS/chpr2.f
deleted file mode 100644
index 3ca388a4..00000000
--- a/superlu/BLAS/chpr2.f
+++ /dev/null
@@ -1,318 +0,0 @@
-*> \brief \b CHPR2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX AP(*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CHPR2  performs the hermitian rank 2 operation
-*>    A := alpha*x*y**H + conjg( alpha )*y*x**H + A,
-*> where alpha is a scalar, x and y are n element vectors and A is an
-*> n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is COMPLEX array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX AP(*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHPR2 ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when upper triangle is stored in AP.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(J))
-                      TEMP2 = CONJG(ALPHA*X(J))
-                      K = KK
-                      DO 10 I = 1,J - 1
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   10                 CONTINUE
-                      AP(KK+J-1) = REAL(AP(KK+J-1)) +
-     +                             REAL(X(J)*TEMP1+Y(J)*TEMP2)
-                  ELSE
-                      AP(KK+J-1) = REAL(AP(KK+J-1))
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(JY))
-                      TEMP2 = CONJG(ALPHA*X(JX))
-                      IX = KX
-                      IY = KY
-                      DO 30 K = KK,KK + J - 2
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                      AP(KK+J-1) = REAL(AP(KK+J-1)) +
-     +                             REAL(X(JX)*TEMP1+Y(JY)*TEMP2)
-                  ELSE
-                      AP(KK+J-1) = REAL(AP(KK+J-1))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when lower triangle is stored in AP.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(J))
-                      TEMP2 = CONJG(ALPHA*X(J))
-                      AP(KK) = REAL(AP(KK)) +
-     +                         REAL(X(J)*TEMP1+Y(J)*TEMP2)
-                      K = KK + 1
-                      DO 50 I = J + 1,N
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   50                 CONTINUE
-                  ELSE
-                      AP(KK) = REAL(AP(KK))
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(JY))
-                      TEMP2 = CONJG(ALPHA*X(JX))
-                      AP(KK) = REAL(AP(KK)) +
-     +                         REAL(X(JX)*TEMP1+Y(JY)*TEMP2)
-                      IX = JX
-                      IY = JY
-                      DO 70 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          IY = IY + INCY
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-   70                 CONTINUE
-                  ELSE
-                      AP(KK) = REAL(AP(KK))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CHPR2 .
-      END
diff --git a/superlu/BLAS/crotg.f b/superlu/BLAS/crotg.f
deleted file mode 100644
index 1cdb662e..00000000
--- a/superlu/BLAS/crotg.f
+++ /dev/null
@@ -1,74 +0,0 @@
-*> \brief \b CROTG
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX CA,CB,S
-*       REAL C
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CROTG determines a complex Givens rotation.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      REAL C
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (CABS(CA).EQ.0.) THEN
-         C = 0.
-         S = (1.,0.)
-         CA = CB
-      ELSE
-         SCALE = CABS(CA) + CABS(CB)
-         ALPHA = CA/CABS(CA)
-         C = CABS(CA)/NORM
-         S = ALPHA*CONJG(CB)/NORM
-         CA = ALPHA*NORM
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/cscal.f b/superlu/BLAS/cscal.f
deleted file mode 100644
index 1405a977..00000000
--- a/superlu/BLAS/cscal.f
+++ /dev/null
@@ -1,91 +0,0 @@
-*> \brief \b CSCAL
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX CA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    CSCAL scales a vector by a constant.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack,  3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-         DO I = 1,N
-            CX(I) = CA*CX(I)
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            CX(I) = CA*CX(I)
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/csrot.f b/superlu/BLAS/csrot.f
deleted file mode 100644
index aa8564e7..00000000
--- a/superlu/BLAS/csrot.f
+++ /dev/null
@@ -1,153 +0,0 @@
-*> \brief \b CSROT
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       INTEGER           INCX, INCY, N
-*       REAL              C, S
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX           CX( * ), CY( * )
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CSROT applies a plane rotation, where the cos and sin (c and s) are real
-*> and the vectors cx and cy are complex.
-*> jack dongarra, linpack, 3/11/78.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the vectors cx and cy.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in,out] CX
-*> \verbatim
-*>          CX is COMPLEX array, dimension at least
-*>           ( 1 + ( N - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array CX must contain the n
-*>           element vector cx. On exit, CX is overwritten by the updated
-*>           vector cx.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           CX. INCX must not be zero.
-*> \endverbatim
-*> \param[in,out] CY
-*> \verbatim
-*>          CY is COMPLEX array, dimension at least
-*>           ( 1 + ( N - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array CY must contain the n
-*>           element vector cy. On exit, CY is overwritten by the updated
-*>           vector cy.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           CY. INCY must not be zero.
-*> \endverbatim
-*> \param[in] C
-*> \verbatim
-*>          C is REAL
-*>           On entry, C specifies the cosine, cos.
-*> \endverbatim
-*> \param[in] S
-*> \verbatim
-*>          S is REAL
-*>           On entry, S specifies the sine, sin.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      INTEGER           INCX, INCY, N
-      REAL              C, S
-*     ..
-*     .. Array Arguments ..
-      COMPLEX           CX( * ), CY( * )
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-      INTEGER           I, IX, IY
-      COMPLEX           CTEMP
-*     ..
-*     .. Executable Statements ..
-      IF( N.LE.0 )
-     $   RETURN
-      IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN
-*        code for both increments equal to 1
-         DO I = 1, N
-            CTEMP = C*CX( I ) + S*CY( I )
-            CY( I ) = C*CY( I ) - S*CX( I )
-            CX( I ) = CTEMP
-         END DO
-      ELSE
-*        code for unequal increments or equal increments not equal
-*          to 1
-         IX = 1
-         IY = 1
-         IF( INCX.LT.0 )
-     $      IX = ( -N+1 )*INCX + 1
-         IF( INCY.LT.0 )
-     $      IY = ( -N+1 )*INCY + 1
-         DO I = 1, N
-            CTEMP = C*CX( IX ) + S*CY( IY )
-            CY( IY ) = C*CY( IY ) - S*CX( IX )
-            CX( IX ) = CTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/csscal.f b/superlu/BLAS/csscal.f
deleted file mode 100644
index dc02654f..00000000
--- a/superlu/BLAS/csscal.f
+++ /dev/null
@@ -1,94 +0,0 @@
-*> \brief \b CSSCAL
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL SA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    CSSCAL scales a complex vector by a real constant.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      REAL SA
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-         DO I = 1,N
-            CX(I) = CMPLX(SA*REAL(CX(I)),SA*AIMAG(CX(I)))
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            CX(I) = CMPLX(SA*REAL(CX(I)),SA*AIMAG(CX(I)))
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/cswap.f b/superlu/BLAS/cswap.f
deleted file mode 100644
index 369a294e..00000000
--- a/superlu/BLAS/cswap.f
+++ /dev/null
@@ -1,98 +0,0 @@
-*> \brief \b CSWAP
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>   CSWAP interchanges two vectors.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*       code for both increments equal to 1
-         DO I = 1,N
-            CTEMP = CX(I)
-            CX(I) = CY(I)
-            CY(I) = CTEMP
-         END DO
-      ELSE
-*       code for unequal increments or equal increments not equal
-*         to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CTEMP = CX(IX)
-            CX(IX) = CY(IY)
-            CY(IY) = CTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/csymm.f b/superlu/BLAS/csymm.f
deleted file mode 100644
index 906a5720..00000000
--- a/superlu/BLAS/csymm.f
+++ /dev/null
@@ -1,369 +0,0 @@
-*> \brief \b CSYMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CSYMM  performs one of the matrix-matrix operations
-*>    C := alpha*A*B + beta*C,
-*> or
-*>    C := alpha*B*A + beta*C,
-*> where  alpha and beta are scalars, A is a symmetric matrix and  B and
-*> C are m by n matrices.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  symmetric matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  symmetric  matrix   A  is  to  be
-*>           referenced as follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least max( 1, n ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     Set NROWA as the number of rows of A.
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     And when
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSAME(SIDE,'L')) THEN
-*        Form  C := alpha*A*B + beta*C.
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*B*A + beta*C.
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*A(J,J)
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*A(J,K)
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(J,K)
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-      RETURN
-*     End of CSYMM .
-      END
diff --git a/superlu/BLAS/csyr2k.f b/superlu/BLAS/csyr2k.f
deleted file mode 100644
index 1fdeadc0..00000000
--- a/superlu/BLAS/csyr2k.f
+++ /dev/null
@@ -1,396 +0,0 @@
-*> \brief \b CSYR2K
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CSYR2K  performs one of the symmetric rank 2k operations
-*>    C := alpha*A*B**T + alpha*B*A**T + beta*C,
-*> or
-*>    C := alpha*A**T*B + alpha*B**T*A + beta*C,
-*> where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
-*> and  A and B  are  n by k  matrices  in the  first  case  and  k by n
-*> matrices in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'    C := alpha*A*B**T + alpha*B*A**T +
-*>                                         beta*C.
-*>              TRANS = 'T' or 't'    C := alpha*A**T*B + alpha*B**T*A +
-*>                                         beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'T' or 't',  K  specifies  the number of rows of the
-*>           matrices  A and B.  K must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'T'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*B**T + alpha*B*A**T + C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**T*B + alpha*B**T*A + C.
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CSYR2K.
-      END
diff --git a/superlu/BLAS/csyrk.f b/superlu/BLAS/csyrk.f
deleted file mode 100644
index c4494c5a..00000000
--- a/superlu/BLAS/csyrk.f
+++ /dev/null
@@ -1,363 +0,0 @@
-*> \brief \b CSYRK
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CSYRK  performs one of the symmetric rank k operations
-*>    C := alpha*A*A**T + beta*C,
-*> or
-*>    C := alpha*A**T*A + beta*C,
-*> where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
-*> and  A  is an  n by k  matrix in the first case and a  k by n  matrix
-*> in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**T + beta*C.
-*>              TRANS = 'T' or 't'   C := alpha*A**T*A + beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'T' or 't',  K  specifies  the number of rows of the
-*>           matrix A.  K must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'T'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*A**T + beta*C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**T*A + beta*C.
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP = ZERO
-                      DO 220 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of CSYRK .
-      END
diff --git a/superlu/BLAS/ctbmv.f b/superlu/BLAS/ctbmv.f
deleted file mode 100644
index 1513c1a3..00000000
--- a/superlu/BLAS/ctbmv.f
+++ /dev/null
@@ -1,429 +0,0 @@
-*> \brief \b CTBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CTBMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX   too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*         Form  x := A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = KPLUS1 - J
-                          DO 10 I = MAX(1,J-K),J - 1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = KPLUS1 - J
-                          DO 30 I = MAX(1,J-K),J - 1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
-                      END IF
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = 1 - J
-                          DO 50 I = MIN(N,J+K),J + 1,-1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(1,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = 1 - J
-                          DO 70 I = MIN(N,J+K),J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(1,J)
-                      END IF
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x  or  x := A**H*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                          DO 90 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + A(L+I,J)*X(I)
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(KPLUS1,J))
-                          DO 100 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + CONJG(A(L+I,J))*X(I)
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      KX = KX - INCX
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                          DO 120 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + A(L+I,J)*X(IX)
-                              IX = IX - INCX
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(KPLUS1,J))
-                          DO 130 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + CONJG(A(L+I,J))*X(IX)
-                              IX = IX - INCX
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(1,J)
-                          DO 150 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + A(L+I,J)*X(I)
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(1,J))
-                          DO 160 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + CONJG(A(L+I,J))*X(I)
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      KX = KX + INCX
-                      IX = KX
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(1,J)
-                          DO 180 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + A(L+I,J)*X(IX)
-                              IX = IX + INCX
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(1,J))
-                          DO 190 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + CONJG(A(L+I,J))*X(IX)
-                              IX = IX + INCX
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of CTBMV .
-      END
diff --git a/superlu/BLAS/ctbsv.f b/superlu/BLAS/ctbsv.f
deleted file mode 100644
index f4cc3306..00000000
--- a/superlu/BLAS/ctbsv.f
+++ /dev/null
@@ -1,432 +0,0 @@
-*> \brief \b CTBSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CTBSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular band matrix, with ( k + 1 )
-*> diagonals.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed by sequentially with one pass through A.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(KPLUS1,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,MAX(1,J-K),-1
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 40 J = N,1,-1
-                      KX = KX - INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(KPLUS1,J)
-                          TEMP = X(JX)
-                          DO 30 I = J - 1,MAX(1,J-K),-1
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX - INCX
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          L = 1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(1,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,MIN(N,J+K)
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      KX = KX + INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = 1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(1,J)
-                          TEMP = X(JX)
-                          DO 70 I = J + 1,MIN(N,J+K)
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX + INCX
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          DO 90 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - A(L+I,J)*X(I)
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      ELSE
-                          DO 100 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - CONJG(A(L+I,J))*X(I)
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(KPLUS1,J))
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          DO 120 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - A(L+I,J)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      ELSE
-                          DO 130 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - CONJG(A(L+I,J))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(KPLUS1,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          DO 150 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - A(L+I,J)*X(I)
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      ELSE
-                          DO 160 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - CONJG(A(L+I,J))*X(I)
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(1,J))
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          DO 180 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - A(L+I,J)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      ELSE
-                          DO 190 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - CONJG(A(L+I,J))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(1,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of CTBSV .
-      END
diff --git a/superlu/BLAS/ctpmv.f b/superlu/BLAS/ctpmv.f
deleted file mode 100644
index 4582acc9..00000000
--- a/superlu/BLAS/ctpmv.f
+++ /dev/null
@@ -1,388 +0,0 @@
-*> \brief \b CTPMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CTPMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX AP(*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*        Form  x:= A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K + 1
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK+J-1)
-                      END IF
-                      KK = KK + J
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 K = KK,KK + J - 2
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1)
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K - 1
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK-N+J)
-                      END IF
-                      KK = KK - (N-J+1)
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 K = KK,KK - (N- (J+1)),-1
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J)
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x  or  x := A**H*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK - 1
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 90 I = J - 1,1,-1
-                              TEMP = TEMP + AP(K)*X(I)
-                              K = K - 1
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK))
-                          DO 100 I = J - 1,1,-1
-                              TEMP = TEMP + CONJG(AP(K))*X(I)
-                              K = K - 1
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-                      KK = KK - J
-  110             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 120 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + AP(K)*X(IX)
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK))
-                          DO 130 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + CONJG(AP(K))*X(IX)
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - J
-  140             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      K = KK + 1
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 150 I = J + 1,N
-                              TEMP = TEMP + AP(K)*X(I)
-                              K = K + 1
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK))
-                          DO 160 I = J + 1,N
-                              TEMP = TEMP + CONJG(AP(K))*X(I)
-                              K = K + 1
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-                      KK = KK + (N-J+1)
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 180 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              TEMP = TEMP + AP(K)*X(IX)
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK))
-                          DO 190 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              TEMP = TEMP + CONJG(AP(K))*X(IX)
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of CTPMV .
-      END
diff --git a/superlu/BLAS/ctpsv.f b/superlu/BLAS/ctpsv.f
deleted file mode 100644
index 2fcd19ba..00000000
--- a/superlu/BLAS/ctpsv.f
+++ /dev/null
@@ -1,390 +0,0 @@
-*> \brief \b CTPSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CTPSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix, supplied in packed form.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX AP(*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK - 1
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K - 1
-   10                     CONTINUE
-                      END IF
-                      KK = KK - J
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK + 1
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K + 1
-   50                     CONTINUE
-                      END IF
-                      KK = KK + (N-J+1)
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      K = KK
-                      IF (NOCONJ) THEN
-                          DO 90 I = 1,J - 1
-                              TEMP = TEMP - AP(K)*X(I)
-                              K = K + 1
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      ELSE
-                          DO 100 I = 1,J - 1
-                              TEMP = TEMP - CONJG(AP(K))*X(I)
-                              K = K + 1
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK+J-1))
-                      END IF
-                      X(J) = TEMP
-                      KK = KK + J
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      IF (NOCONJ) THEN
-                          DO 120 K = KK,KK + J - 2
-                              TEMP = TEMP - AP(K)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      ELSE
-                          DO 130 K = KK,KK + J - 2
-                              TEMP = TEMP - CONJG(AP(K))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK+J-1))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + J
-  140             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK
-                      IF (NOCONJ) THEN
-                          DO 150 I = N,J + 1,-1
-                              TEMP = TEMP - AP(K)*X(I)
-                              K = K - 1
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      ELSE
-                          DO 160 I = N,J + 1,-1
-                              TEMP = TEMP - CONJG(AP(K))*X(I)
-                              K = K - 1
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK-N+J))
-                      END IF
-                      X(J) = TEMP
-                      KK = KK - (N-J+1)
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      IF (NOCONJ) THEN
-                          DO 180 K = KK,KK - (N- (J+1)),-1
-                              TEMP = TEMP - AP(K)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      ELSE
-                          DO 190 K = KK,KK - (N- (J+1)),-1
-                              TEMP = TEMP - CONJG(AP(K))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK-N+J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of CTPSV .
-      END
diff --git a/superlu/BLAS/ctrmm.f b/superlu/BLAS/ctrmm.f
deleted file mode 100644
index a23fb27c..00000000
--- a/superlu/BLAS/ctrmm.f
+++ /dev/null
@@ -1,452 +0,0 @@
-*> \brief \b CTRMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CTRMM  performs one of the matrix-matrix operations
-*>    B := alpha*op( A )*B,   or   B := alpha*B*op( A )
-*> where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>    op( A ) = A   or   op( A ) = A**T   or   op( A ) = A**H.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE specifies whether  op( A ) multiplies B from
-*>           the left or right as follows:
-*>              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
-*>              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>              TRANSA = 'C' or 'c'   op( A ) = A**H.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, k ), where k is m
-*>           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*> \param[in,out] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain the matrix  B,  and  on exit  is overwritten  by the
-*>           transformed matrix.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     Test the input parameters.
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-          INFO = 2
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-          INFO = 9
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*     And when
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*A*B.
-              IF (UPPER) THEN
-                  DO 50 J = 1,N
-                      DO 40 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              DO 30 I = 1,K - 1
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   30                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP*A(K,K)
-                              B(K,J) = TEMP
-                          END IF
-   40                 CONTINUE
-   50             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              B(K,J) = TEMP
-                              IF (NOUNIT) B(K,J) = B(K,J)*A(K,K)
-                              DO 60 I = K + 1,M
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   60                         CONTINUE
-                          END IF
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*A**T*B   or   B := alpha*A**H*B.
-              IF (UPPER) THEN
-                  DO 120 J = 1,N
-                      DO 110 I = M,1,-1
-                          TEMP = B(I,J)
-                          IF (NOCONJ) THEN
-                              IF (NOUNIT) TEMP = TEMP*A(I,I)
-                              DO 90 K = 1,I - 1
-                                  TEMP = TEMP + A(K,I)*B(K,J)
-   90                         CONTINUE
-                          ELSE
-                              IF (NOUNIT) TEMP = TEMP*CONJG(A(I,I))
-                              DO 100 K = 1,I - 1
-                                  TEMP = TEMP + CONJG(A(K,I))*B(K,J)
-  100                         CONTINUE
-                          END IF
-                          B(I,J) = ALPHA*TEMP
-  110                 CONTINUE
-  120             CONTINUE
-              ELSE
-                  DO 160 J = 1,N
-                      DO 150 I = 1,M
-                          TEMP = B(I,J)
-                          IF (NOCONJ) THEN
-                              IF (NOUNIT) TEMP = TEMP*A(I,I)
-                              DO 130 K = I + 1,M
-                                  TEMP = TEMP + A(K,I)*B(K,J)
-  130                         CONTINUE
-                          ELSE
-                              IF (NOUNIT) TEMP = TEMP*CONJG(A(I,I))
-                              DO 140 K = I + 1,M
-                                  TEMP = TEMP + CONJG(A(K,I))*B(K,J)
-  140                         CONTINUE
-                          END IF
-                          B(I,J) = ALPHA*TEMP
-  150                 CONTINUE
-  160             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*B*A.
-              IF (UPPER) THEN
-                  DO 200 J = N,1,-1
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 170 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  170                 CONTINUE
-                      DO 190 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 180 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  180                         CONTINUE
-                          END IF
-  190                 CONTINUE
-  200             CONTINUE
-              ELSE
-                  DO 240 J = 1,N
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 210 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  210                 CONTINUE
-                      DO 230 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 220 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  220                         CONTINUE
-                          END IF
-  230                 CONTINUE
-  240             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*B*A**T   or   B := alpha*B*A**H.
-              IF (UPPER) THEN
-                  DO 280 K = 1,N
-                      DO 260 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = ALPHA*A(J,K)
-                              ELSE
-                                  TEMP = ALPHA*CONJG(A(J,K))
-                              END IF
-                              DO 250 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  250                         CONTINUE
-                          END IF
-  260                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = TEMP*A(K,K)
-                          ELSE
-                              TEMP = TEMP*CONJG(A(K,K))
-                          END IF
-                      END IF
-                      IF (TEMP.NE.ONE) THEN
-                          DO 270 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  270                     CONTINUE
-                      END IF
-  280             CONTINUE
-              ELSE
-                  DO 320 K = N,1,-1
-                      DO 300 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = ALPHA*A(J,K)
-                              ELSE
-                                  TEMP = ALPHA*CONJG(A(J,K))
-                              END IF
-                              DO 290 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  290                         CONTINUE
-                          END IF
-  300                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = TEMP*A(K,K)
-                          ELSE
-                              TEMP = TEMP*CONJG(A(K,K))
-                          END IF
-                      END IF
-                      IF (TEMP.NE.ONE) THEN
-                          DO 310 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  310                     CONTINUE
-                      END IF
-  320             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of CTRMM .
-      END
diff --git a/superlu/BLAS/ctrmv.f b/superlu/BLAS/ctrmv.f
deleted file mode 100644
index 8795e870..00000000
--- a/superlu/BLAS/ctrmv.f
+++ /dev/null
@@ -1,373 +0,0 @@
-*> \brief \b CTRMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CTRMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 6
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*        Form  x := A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*A(I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 I = 1,J - 1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*A(I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 I = N,J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x  or  x := A**H*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 90 I = J - 1,1,-1
-                              TEMP = TEMP + A(I,J)*X(I)
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J))
-                          DO 100 I = J - 1,1,-1
-                              TEMP = TEMP + CONJG(A(I,J))*X(I)
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 120 I = J - 1,1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + A(I,J)*X(IX)
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J))
-                          DO 130 I = J - 1,1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + CONJG(A(I,J))*X(IX)
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 150 I = J + 1,N
-                              TEMP = TEMP + A(I,J)*X(I)
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J))
-                          DO 160 I = J + 1,N
-                              TEMP = TEMP + CONJG(A(I,J))*X(I)
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 180 I = J + 1,N
-                              IX = IX + INCX
-                              TEMP = TEMP + A(I,J)*X(IX)
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J))
-                          DO 190 I = J + 1,N
-                              IX = IX + INCX
-                              TEMP = TEMP + CONJG(A(I,J))*X(IX)
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of CTRMV .
-      END
diff --git a/superlu/BLAS/ctrsm.f b/superlu/BLAS/ctrsm.f
deleted file mode 100644
index 7ee5c947..00000000
--- a/superlu/BLAS/ctrsm.f
+++ /dev/null
@@ -1,477 +0,0 @@
-*> \brief \b CTRSM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CTRSM  solves one of the matrix equations
-*>    op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
-*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>    op( A ) = A   or   op( A ) = A**T   or   op( A ) = A**H.
-*> The matrix X is overwritten on B.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry, SIDE specifies whether op( A ) appears on the left
-*>           or right of X as follows:
-*>              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
-*>              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>              TRANSA = 'C' or 'c'   op( A ) = A**H.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, k ),
-*>           where k is m when SIDE = 'L' or 'l'
-*>             and k is n when SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*> \param[in,out] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain  the  right-hand  side  matrix  B,  and  on exit  is
-*>           overwritten by the solution matrix  X.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     Test the input parameters.
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-          INFO = 2
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-          INFO = 9
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*     And when
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*inv( A )*B.
-              IF (UPPER) THEN
-                  DO 60 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 30 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   30                     CONTINUE
-                      END IF
-                      DO 50 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 40 I = 1,K - 1
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   40                         CONTINUE
-                          END IF
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 100 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 70 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   70                     CONTINUE
-                      END IF
-                      DO 90 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 80 I = K + 1,M
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   80                         CONTINUE
-                          END IF
-   90                 CONTINUE
-  100             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*inv( A**T )*B
-*           or    B := alpha*inv( A**H )*B.
-              IF (UPPER) THEN
-                  DO 140 J = 1,N
-                      DO 130 I = 1,M
-                          TEMP = ALPHA*B(I,J)
-                          IF (NOCONJ) THEN
-                              DO 110 K = 1,I - 1
-                                  TEMP = TEMP - A(K,I)*B(K,J)
-  110                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          ELSE
-                              DO 120 K = 1,I - 1
-                                  TEMP = TEMP - CONJG(A(K,I))*B(K,J)
-  120                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/CONJG(A(I,I))
-                          END IF
-                          B(I,J) = TEMP
-  130                 CONTINUE
-  140             CONTINUE
-              ELSE
-                  DO 180 J = 1,N
-                      DO 170 I = M,1,-1
-                          TEMP = ALPHA*B(I,J)
-                          IF (NOCONJ) THEN
-                              DO 150 K = I + 1,M
-                                  TEMP = TEMP - A(K,I)*B(K,J)
-  150                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          ELSE
-                              DO 160 K = I + 1,M
-                                  TEMP = TEMP - CONJG(A(K,I))*B(K,J)
-  160                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/CONJG(A(I,I))
-                          END IF
-                          B(I,J) = TEMP
-  170                 CONTINUE
-  180             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*B*inv( A ).
-              IF (UPPER) THEN
-                  DO 230 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 190 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  190                     CONTINUE
-                      END IF
-                      DO 210 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 200 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  200                         CONTINUE
-                          END IF
-  210                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 220 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  220                     CONTINUE
-                      END IF
-  230             CONTINUE
-              ELSE
-                  DO 280 J = N,1,-1
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 240 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  240                     CONTINUE
-                      END IF
-                      DO 260 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 250 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  250                         CONTINUE
-                          END IF
-  260                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 270 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  270                     CONTINUE
-                      END IF
-  280             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*B*inv( A**T )
-*           or    B := alpha*B*inv( A**H ).
-              IF (UPPER) THEN
-                  DO 330 K = N,1,-1
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = ONE/A(K,K)
-                          ELSE
-                              TEMP = ONE/CONJG(A(K,K))
-                          END IF
-                          DO 290 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  290                     CONTINUE
-                      END IF
-                      DO 310 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = A(J,K)
-                              ELSE
-                                  TEMP = CONJG(A(J,K))
-                              END IF
-                              DO 300 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  300                         CONTINUE
-                          END IF
-  310                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 320 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  320                     CONTINUE
-                      END IF
-  330             CONTINUE
-              ELSE
-                  DO 380 K = 1,N
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = ONE/A(K,K)
-                          ELSE
-                              TEMP = ONE/CONJG(A(K,K))
-                          END IF
-                          DO 340 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  340                     CONTINUE
-                      END IF
-                      DO 360 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = A(J,K)
-                              ELSE
-                                  TEMP = CONJG(A(J,K))
-                              END IF
-                              DO 350 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  350                         CONTINUE
-                          END IF
-  360                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 370 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  370                     CONTINUE
-                      END IF
-  380             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of CTRSM .
-      END
diff --git a/superlu/BLAS/ctrsv.f b/superlu/BLAS/ctrsv.f
deleted file mode 100644
index 7981a21d..00000000
--- a/superlu/BLAS/ctrsv.f
+++ /dev/null
@@ -1,375 +0,0 @@
-*> \brief \b CTRSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> CTRSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 6
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*A(I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 I = J - 1,1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*A(I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 I = J + 1,N
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          DO 90 I = 1,J - 1
-                              TEMP = TEMP - A(I,J)*X(I)
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 100 I = 1,J - 1
-                              TEMP = TEMP - CONJG(A(I,J))*X(I)
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J))
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      IX = KX
-                      TEMP = X(JX)
-                      IF (NOCONJ) THEN
-                          DO 120 I = 1,J - 1
-                              TEMP = TEMP - A(I,J)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 130 I = 1,J - 1
-                              TEMP = TEMP - CONJG(A(I,J))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          DO 150 I = N,J + 1,-1
-                              TEMP = TEMP - A(I,J)*X(I)
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 160 I = N,J + 1,-1
-                              TEMP = TEMP - CONJG(A(I,J))*X(I)
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J))
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      IX = KX
-                      TEMP = X(JX)
-                      IF (NOCONJ) THEN
-                          DO 180 I = N,J + 1,-1
-                              TEMP = TEMP - A(I,J)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 190 I = N,J + 1,-1
-                              TEMP = TEMP - CONJG(A(I,J))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of CTRSV .
-      END
diff --git a/superlu/BLAS/dasum.f b/superlu/BLAS/dasum.f
deleted file mode 100644
index fd3d9104..00000000
--- a/superlu/BLAS/dasum.f
+++ /dev/null
@@ -1,111 +0,0 @@
-*> \brief \b DASUM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    DASUM takes the sum of the absolute values.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      DASUM = 0.0d0
-      DTEMP = 0.0d0
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-*        clean-up loop
-         M = MOD(N,6)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DTEMP = DTEMP + DABS(DX(I))
-            END DO
-            IF (N.LT.6) THEN
-               DASUM = DTEMP
-               RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,6
-            DTEMP = DTEMP + DABS(DX(I)) + DABS(DX(I+1)) +
-     $              DABS(DX(I+2)) + DABS(DX(I+3)) +
-     $              DABS(DX(I+4)) + DABS(DX(I+5))
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            DTEMP = DTEMP + DABS(DX(I))
-         END DO
-      END IF
-      DASUM = DTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/daxpy.f b/superlu/BLAS/daxpy.f
deleted file mode 100644
index 5203e50c..00000000
--- a/superlu/BLAS/daxpy.f
+++ /dev/null
@@ -1,115 +0,0 @@
-*> \brief \b DAXPY
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    DAXPY constant times a vector plus a vector.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (DA.EQ.0.0d0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-*        clean-up loop
-         M = MOD(N,4)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DY(I) = DY(I) + DA*DX(I)
-            END DO
-         END IF
-         IF (N.LT.4) RETURN
-         MP1 = M + 1
-         DO I = MP1,N,4
-            DY(I) = DY(I) + DA*DX(I)
-            DY(I+1) = DY(I+1) + DA*DX(I+1)
-            DY(I+2) = DY(I+2) + DA*DX(I+2)
-            DY(I+3) = DY(I+3) + DA*DX(I+3)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-          DY(IY) = DY(IY) + DA*DX(IX)
-          IX = IX + INCX
-          IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/dcabs1.f b/superlu/BLAS/dcabs1.f
deleted file mode 100644
index d71fe7af..00000000
--- a/superlu/BLAS/dcabs1.f
+++ /dev/null
@@ -1,58 +0,0 @@
-*> \brief \b DCABS1
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX*16 Z
-*       ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      COMPLEX*16 Z
-*     ..
-*     ..
-*  =====================================================================
-*     .. Intrinsic Functions ..
-      DCABS1 = ABS(DBLE(Z)) + ABS(DIMAG(Z))
-      RETURN
-      END
diff --git a/superlu/BLAS/dcopy.f b/superlu/BLAS/dcopy.f
deleted file mode 100644
index bbc38a75..00000000
--- a/superlu/BLAS/dcopy.f
+++ /dev/null
@@ -1,115 +0,0 @@
-*> \brief \b DCOPY
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    DCOPY copies a vector, x, to a vector, y.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-*        clean-up loop
-         M = MOD(N,7)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DY(I) = DX(I)
-            END DO
-            IF (N.LT.7) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,7
-            DY(I) = DX(I)
-            DY(I+1) = DX(I+1)
-            DY(I+2) = DX(I+2)
-            DY(I+3) = DX(I+3)
-            DY(I+4) = DX(I+4)
-            DY(I+5) = DX(I+5)
-            DY(I+6) = DX(I+6)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            DY(IY) = DX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/ddot.f b/superlu/BLAS/ddot.f
deleted file mode 100644
index 1aea8240..00000000
--- a/superlu/BLAS/ddot.f
+++ /dev/null
@@ -1,117 +0,0 @@
-*> \brief \b DDOT
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    DDOT forms the dot product of two vectors.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      DDOT = 0.0d0
-      DTEMP = 0.0d0
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-*        clean-up loop
-         M = MOD(N,5)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DTEMP = DTEMP + DX(I)*DY(I)
-            END DO
-            IF (N.LT.5) THEN
-               DDOT=DTEMP
-            RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,5
-          DTEMP = DTEMP + DX(I)*DY(I) + DX(I+1)*DY(I+1) +
-     $            DX(I+2)*DY(I+2) + DX(I+3)*DY(I+3) + DX(I+4)*DY(I+4)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            DTEMP = DTEMP + DX(IX)*DY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      DDOT = DTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/dgbmv.f b/superlu/BLAS/dgbmv.f
deleted file mode 100644
index 3769e18b..00000000
--- a/superlu/BLAS/dgbmv.f
+++ /dev/null
@@ -1,370 +0,0 @@
-*> \brief \b DGBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DGBMV  performs one of the matrix-vector operations
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n band matrix, with kl sub-diagonals and ku super-diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>              TRANS = 'C' or 'c'   y := alpha*A**T*x + beta*y.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] KL
-*> \verbatim
-*>          KL is INTEGER
-*>           On entry, KL specifies the number of sub-diagonals of the
-*>           matrix A. KL must satisfy  0 .le. KL.
-*> \endverbatim
-*> \param[in] KU
-*> \verbatim
-*>          KU is INTEGER
-*>           On entry, KU specifies the number of super-diagonals of the
-*>           matrix A. KU must satisfy  0 .le. KU.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading ( kl + ku + 1 ) by n part of the
-*>           array A must contain the matrix of coefficients, supplied
-*>           column by column, with the leading diagonal of the matrix in
-*>           row ( ku + 1 ) of the array, the first super-diagonal
-*>           starting at position 2 in row ku, the first sub-diagonal
-*>           starting at position 1 in row ( ku + 2 ), and so on.
-*>           Elements in the array A that do not correspond to elements
-*>           in the band matrix (such as the top left ku by ku triangle)
-*>           are not referenced.
-*>           The following program segment will transfer a band matrix
-*>           from conventional full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    K = KU + 1 - J
-*>                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
-*>                       A( K + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( kl + ku + 1 ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (KL.LT.0) THEN
-          INFO = 4
-      ELSE IF (KU.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (KL+KU+1)) THEN
-          INFO = 8
-          INFO = 10
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the band part of A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      KUP1 = KU + 1
-*        Form  y := alpha*A*x + y.
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  K = KUP1 - J
-                  DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(I) = Y(I) + TEMP*A(K+I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  K = KUP1 - J
-                  DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(IY) = Y(IY) + TEMP*A(K+I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-                  IF (J.GT.KU) KY = KY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y := alpha*A**T*x + y.
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 100 J = 1,N
-                  TEMP = ZERO
-                  K = KUP1 - J
-                  DO 90 I = MAX(1,J-KU),MIN(M,J+KL)
-                      TEMP = TEMP + A(K+I,J)*X(I)
-   90             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  100         CONTINUE
-          ELSE
-              DO 120 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  K = KUP1 - J
-                  DO 110 I = MAX(1,J-KU),MIN(M,J+KL)
-                      TEMP = TEMP + A(K+I,J)*X(IX)
-                      IX = IX + INCX
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-                  IF (J.GT.KU) KX = KX + INCX
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DGBMV .
-      END
diff --git a/superlu/BLAS/dgemm.f b/superlu/BLAS/dgemm.f
deleted file mode 100644
index 5c5a2ac2..00000000
--- a/superlu/BLAS/dgemm.f
+++ /dev/null
@@ -1,384 +0,0 @@
-*> \brief \b DGEMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DGEMM  performs one of the matrix-matrix operations
-*>    C := alpha*op( A )*op( B ) + beta*C,
-*> where  op( X ) is one of
-*>    op( X ) = X   or   op( X ) = X**T,
-*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
-*> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n',  op( A ) = A.
-*>              TRANSA = 'T' or 't',  op( A ) = A**T.
-*>              TRANSA = 'C' or 'c',  op( A ) = A**T.
-*> \endverbatim
-*> \param[in] TRANSB
-*> \verbatim
-*>          TRANSB is CHARACTER*1
-*>           On entry, TRANSB specifies the form of op( B ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSB = 'N' or 'n',  op( B ) = B.
-*>              TRANSB = 'T' or 't',  op( B ) = B**T.
-*>              TRANSB = 'C' or 'c',  op( B ) = B**T.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies  the number  of rows  of the  matrix
-*>           op( A )  and of the  matrix  C.  M  must  be at least  zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N  specifies the number  of columns of the matrix
-*>           op( B ) and the number of columns of the matrix C. N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry,  K  specifies  the number of columns of the matrix
-*>           op( A ) and the number of rows of the matrix op( B ). K must
-*>           be at least  zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-*>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by m  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, k ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
-*>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-*>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  n by k  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
-*>           LDB must be at least  max( 1, k ), otherwise  LDB must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n  matrix
-*>           ( alpha*op( A )*op( B ) + beta*C ).
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
-*     and  columns of  A  and the  number of  rows  of  B  respectively.
-      IF (NOTA) THEN
-          NROWA = M
-          NCOLA = K
-      ELSE
-          NROWA = K
-          NCOLA = M
-      END IF
-      IF (NOTB) THEN
-          NROWB = K
-      ELSE
-          NROWB = N
-      END IF
-*     Test the input parameters.
-      INFO = 0
-     +    (.NOT.LSAME(TRANSA,'T'))) THEN
-          INFO = 1
-     +         (.NOT.LSAME(TRANSB,'T'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-          INFO = 8
-          INFO = 10
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And if
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-      IF (NOTB) THEN
-          IF (NOTA) THEN
-*           Form  C := alpha*A*B + beta*C.
-              DO 90 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 50 I = 1,M
-                          C(I,J) = ZERO
-   50                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 60 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-   60                 CONTINUE
-                  END IF
-                  DO 80 L = 1,K
-                      TEMP = ALPHA*B(L,J)
-                      DO 70 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-   70                 CONTINUE
-   80             CONTINUE
-   90         CONTINUE
-          ELSE
-*           Form  C := alpha*A**T*B + beta*C
-              DO 120 J = 1,N
-                  DO 110 I = 1,M
-                      TEMP = ZERO
-                      DO 100 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(L,J)
-  100                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  110             CONTINUE
-  120         CONTINUE
-          END IF
-      ELSE
-          IF (NOTA) THEN
-*           Form  C := alpha*A*B**T + beta*C
-              DO 170 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 130 I = 1,M
-                          C(I,J) = ZERO
-  130                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 140 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  140                 CONTINUE
-                  END IF
-                  DO 160 L = 1,K
-                      TEMP = ALPHA*B(J,L)
-                      DO 150 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  150                 CONTINUE
-  160             CONTINUE
-  170         CONTINUE
-          ELSE
-*           Form  C := alpha*A**T*B**T + beta*C
-              DO 200 J = 1,N
-                  DO 190 I = 1,M
-                      TEMP = ZERO
-                      DO 180 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(J,L)
-  180                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  190             CONTINUE
-  200         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DGEMM .
-      END
diff --git a/superlu/BLAS/dgemv.f b/superlu/BLAS/dgemv.f
deleted file mode 100644
index dd14c350..00000000
--- a/superlu/BLAS/dgemv.f
+++ /dev/null
@@ -1,330 +0,0 @@
-*> \brief \b DGEMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DGEMV  performs one of the matrix-vector operations
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>              TRANS = 'C' or 'c'   y := alpha*A**T*x + beta*y.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry with BETA non-zero, the incremented array Y
-*>           must contain the vector y. On exit, Y is overwritten by the
-*>           updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-          INFO = 6
-          INFO = 8
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-*        Form  y := alpha*A*x + y.
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  DO 50 I = 1,M
-                      Y(I) = Y(I) + TEMP*A(I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  DO 70 I = 1,M
-                      Y(IY) = Y(IY) + TEMP*A(I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y := alpha*A**T*x + y.
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 100 J = 1,N
-                  TEMP = ZERO
-                  DO 90 I = 1,M
-                      TEMP = TEMP + A(I,J)*X(I)
-   90             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  100         CONTINUE
-          ELSE
-              DO 120 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  DO 110 I = 1,M
-                      TEMP = TEMP + A(I,J)*X(IX)
-                      IX = IX + INCX
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DGEMV .
-      END
diff --git a/superlu/BLAS/dger.f b/superlu/BLAS/dger.f
deleted file mode 100644
index 289141e8..00000000
--- a/superlu/BLAS/dger.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b DGER
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DGER   performs the rank 1 operation
-*>    A := alpha*x*y**T + A,
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DGER  ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-      RETURN
-*     End of DGER  .
-      END
diff --git a/superlu/BLAS/dnrm2.f b/superlu/BLAS/dnrm2.f
deleted file mode 100644
index 0d7062fd..00000000
--- a/superlu/BLAS/dnrm2.f
+++ /dev/null
@@ -1,112 +0,0 @@
-*> \brief \b DNRM2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DNRM2 returns the euclidean norm of a vector via the function
-*> name, so that
-*>    DNRM2 := sqrt( x'*x )
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  -- This version written on 25-October-1982.
-*>     Modified on 14-October-1993 to inline the call to DLASSQ.
-*>     Sven Hammarling, Nag Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LT.1 .OR. INCX.LT.1) THEN
-          NORM = ZERO
-      ELSE IF (N.EQ.1) THEN
-          NORM = ABS(X(1))
-      ELSE
-          SCALE = ZERO
-          SSQ = ONE
-*        The following loop is equivalent to this call to the LAPACK
-*        auxiliary routine:
-          DO 10 IX = 1,1 + (N-1)*INCX,INCX
-              IF (X(IX).NE.ZERO) THEN
-                  ABSXI = ABS(X(IX))
-                  IF (SCALE.LT.ABSXI) THEN
-                      SSQ = ONE + SSQ* (SCALE/ABSXI)**2
-                      SCALE = ABSXI
-                  ELSE
-                      SSQ = SSQ + (ABSXI/SCALE)**2
-                  END IF
-              END IF
-   10     CONTINUE
-          NORM = SCALE*SQRT(SSQ)
-      END IF
-      DNRM2 = NORM
-      RETURN
-*     End of DNRM2.
-      END
diff --git a/superlu/BLAS/drot.f b/superlu/BLAS/drot.f
deleted file mode 100644
index baaae5c9..00000000
--- a/superlu/BLAS/drot.f
+++ /dev/null
@@ -1,101 +0,0 @@
-*> \brief \b DROT
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    DROT applies a plane rotation.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*       code for both increments equal to 1
-         DO I = 1,N
-            DTEMP = C*DX(I) + S*DY(I)
-            DY(I) = C*DY(I) - S*DX(I)
-            DX(I) = DTEMP
-         END DO
-      ELSE
-*       code for unequal increments or equal increments not equal
-*         to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            DTEMP = C*DX(IX) + S*DY(IY)
-            DY(IY) = C*DY(IY) - S*DX(IX)
-            DX(IX) = DTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/drotg.f b/superlu/BLAS/drotg.f
deleted file mode 100644
index 85d04cd8..00000000
--- a/superlu/BLAS/drotg.f
+++ /dev/null
@@ -1,86 +0,0 @@
-*> \brief \b DROTG
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    DROTG construct givens plane rotation.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      ROE = DB
-      SCALE = DABS(DA) + DABS(DB)
-      IF (SCALE.EQ.0.0d0) THEN
-         C = 1.0d0
-         S = 0.0d0
-         R = 0.0d0
-         Z = 0.0d0
-      ELSE
-         R = SCALE*DSQRT((DA/SCALE)**2+ (DB/SCALE)**2)
-         R = DSIGN(1.0d0,ROE)*R
-         C = DA/R
-         S = DB/R
-         Z = 1.0d0
-         IF (DABS(DA).GT.DABS(DB)) Z = S
-         IF (DABS(DB).GE.DABS(DA) .AND. C.NE.0.0d0) Z = 1.0d0/C
-      END IF
-      DA = R
-      DB = Z
-      RETURN
-      END
diff --git a/superlu/BLAS/drotm.f b/superlu/BLAS/drotm.f
deleted file mode 100644
index b006dbd5..00000000
--- a/superlu/BLAS/drotm.f
+++ /dev/null
@@ -1,202 +0,0 @@
-*> \brief \b DROTM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    (DY**T)
-*>    DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE
-*>    DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0
-*>      (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0)
-*>    H=(          )    (          )    (          )    (          )
-*>      (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0).
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         number of elements in input vector(s)
-*> \endverbatim
-*> \param[in,out] DX
-*> \verbatim
-*>          DX is DOUBLE PRECISION array, dimension N
-*>         double precision vector with N elements
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>         storage spacing between elements of DX
-*> \endverbatim
-*> \param[in,out] DY
-*> \verbatim
-*>          DY is DOUBLE PRECISION array, dimension N
-*>         double precision vector with N elements
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>         storage spacing between elements of DY
-*> \endverbatim
-*> \param[in,out] DPARAM
-*> \verbatim
-*>          DPARAM is DOUBLE PRECISION array, dimension 5
-*>     DPARAM(1)=DFLAG
-*>     DPARAM(2)=DH11
-*>     DPARAM(3)=DH21
-*>     DPARAM(4)=DH12
-*>     DPARAM(5)=DH22
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Data statements ..
-      DATA ZERO,TWO/0.D0,2.D0/
-*     ..
-      DFLAG = DPARAM(1)
-         NSTEPS = N*INCX
-            DH11 = DPARAM(2)
-            DH12 = DPARAM(4)
-            DH21 = DPARAM(3)
-            DH22 = DPARAM(5)
-            DO I = 1,NSTEPS,INCX
-               W = DX(I)
-               Z = DY(I)
-               DX(I) = W*DH11 + Z*DH12
-               DY(I) = W*DH21 + Z*DH22
-            END DO
-            DH12 = DPARAM(4)
-            DH21 = DPARAM(3)
-            DO I = 1,NSTEPS,INCX
-               W = DX(I)
-               Z = DY(I)
-               DX(I) = W + Z*DH12
-               DY(I) = W*DH21 + Z
-            END DO
-         ELSE
-            DH11 = DPARAM(2)
-            DH22 = DPARAM(5)
-            DO I = 1,NSTEPS,INCX
-               W = DX(I)
-               Z = DY(I)
-               DX(I) = W*DH11 + Z
-               DY(I) = -W + DH22*Z
-            END DO
-         END IF
-      ELSE
-         KX = 1
-         KY = 1
-         IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-         IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-            DH11 = DPARAM(2)
-            DH12 = DPARAM(4)
-            DH21 = DPARAM(3)
-            DH22 = DPARAM(5)
-            DO I = 1,N
-               W = DX(KX)
-               Z = DY(KY)
-               DX(KX) = W*DH11 + Z*DH12
-               DY(KY) = W*DH21 + Z*DH22
-               KX = KX + INCX
-               KY = KY + INCY
-            END DO
-            DH12 = DPARAM(4)
-            DH21 = DPARAM(3)
-            DO I = 1,N
-               W = DX(KX)
-               Z = DY(KY)
-               DX(KX) = W + Z*DH12
-               DY(KY) = W*DH21 + Z
-               KX = KX + INCX
-               KY = KY + INCY
-            END DO
-         ELSE
-             DH11 = DPARAM(2)
-             DH22 = DPARAM(5)
-             DO I = 1,N
-                W = DX(KX)
-                Z = DY(KY)
-                DX(KX) = W*DH11 + Z
-                DY(KY) = -W + DH22*Z
-                KX = KX + INCX
-                KY = KY + INCY
-            END DO
-         END IF
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/drotmg.f b/superlu/BLAS/drotmg.f
deleted file mode 100644
index 1fb025fa..00000000
--- a/superlu/BLAS/drotmg.f
+++ /dev/null
@@ -1,251 +0,0 @@
-*> \brief \b DROTMG
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0
-*>      (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0)
-*>    H=(          )    (          )    (          )    (          )
-*>      (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0).
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in,out] DD1
-*> \verbatim
-*>          DD1 is DOUBLE PRECISION
-*> \endverbatim
-*> \param[in,out] DD2
-*> \verbatim
-*>          DD2 is DOUBLE PRECISION
-*> \endverbatim
-*> \param[in,out] DX1
-*> \verbatim
-*>          DX1 is DOUBLE PRECISION
-*> \endverbatim
-*> \param[in] DY1
-*> \verbatim
-*>          DY1 is DOUBLE PRECISION
-*> \endverbatim
-*> \param[in,out] DPARAM
-*> \verbatim
-*>          DPARAM is DOUBLE PRECISION array, dimension 5
-*>     DPARAM(1)=DFLAG
-*>     DPARAM(2)=DH11
-*>     DPARAM(3)=DH21
-*>     DPARAM(4)=DH12
-*>     DPARAM(5)=DH22
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-     $                 DU,GAM,GAMSQ,ONE,RGAMSQ,TWO,ZERO
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Data statements ..
-      DATA ZERO,ONE,TWO/0.D0,1.D0,2.D0/
-      DATA GAM,GAMSQ,RGAMSQ/4096.D0,16777216.D0,5.9604645D-8/
-*     ..
-      IF (DD1.LT.ZERO) THEN
-*        GO ZERO-H-D-AND-DX1..
-         DFLAG = -ONE
-         DH11 = ZERO
-         DH12 = ZERO
-         DH21 = ZERO
-         DH22 = ZERO
-         DD1 = ZERO
-         DD2 = ZERO
-         DX1 = ZERO
-      ELSE
-         DP2 = DD2*DY1
-         IF (DP2.EQ.ZERO) THEN
-            DFLAG = -TWO
-            DPARAM(1) = DFLAG
-            RETURN
-         END IF
-*        REGULAR-CASE..
-         DP1 = DD1*DX1
-         DQ2 = DP2*DY1
-         DQ1 = DP1*DX1
-         IF (DABS(DQ1).GT.DABS(DQ2)) THEN
-            DH21 = -DY1/DX1
-            DH12 = DP2/DP1
-            DU = ONE - DH12*DH21
-           IF (DU.GT.ZERO) THEN
-             DFLAG = ZERO
-             DD1 = DD1/DU
-             DD2 = DD2/DU
-             DX1 = DX1*DU
-           END IF
-         ELSE
-            IF (DQ2.LT.ZERO) THEN
-*              GO ZERO-H-D-AND-DX1..
-               DFLAG = -ONE
-               DH11 = ZERO
-               DH12 = ZERO
-               DH21 = ZERO
-               DH22 = ZERO
-               DD1 = ZERO
-               DD2 = ZERO
-               DX1 = ZERO
-            ELSE
-               DFLAG = ONE
-               DH11 = DP1/DP2
-               DH22 = DX1/DY1
-               DU = ONE + DH11*DH22
-               DTEMP = DD2/DU
-               DD2 = DD1/DU
-               DD1 = DTEMP
-               DX1 = DY1*DU
-            END IF
-         END IF
-         IF (DD1.NE.ZERO) THEN
-            DO WHILE ((DD1.LE.RGAMSQ) .OR. (DD1.GE.GAMSQ))
-               IF (DFLAG.EQ.ZERO) THEN
-                  DH11 = ONE
-                  DH22 = ONE
-                  DFLAG = -ONE
-               ELSE
-                  DH21 = -ONE
-                  DH12 = ONE
-                  DFLAG = -ONE
-               END IF
-               IF (DD1.LE.RGAMSQ) THEN
-                  DD1 = DD1*GAM**2
-                  DX1 = DX1/GAM
-                  DH11 = DH11/GAM
-                  DH12 = DH12/GAM
-               ELSE
-                  DD1 = DD1/GAM**2
-                  DX1 = DX1*GAM
-                  DH11 = DH11*GAM
-                  DH12 = DH12*GAM
-               END IF
-            ENDDO
-         END IF
-         IF (DD2.NE.ZERO) THEN
-            DO WHILE ( (DABS(DD2).LE.RGAMSQ) .OR. (DABS(DD2).GE.GAMSQ) )
-               IF (DFLAG.EQ.ZERO) THEN
-                  DH11 = ONE
-                  DH22 = ONE
-                  DFLAG = -ONE
-               ELSE
-                  DH21 = -ONE
-                  DH12 = ONE
-                  DFLAG = -ONE
-               END IF
-               IF (DABS(DD2).LE.RGAMSQ) THEN
-                  DD2 = DD2*GAM**2
-                  DH21 = DH21/GAM
-                  DH22 = DH22/GAM
-               ELSE
-                  DD2 = DD2/GAM**2
-                  DH21 = DH21*GAM
-                  DH22 = DH22*GAM
-               END IF
-            END DO
-         END IF
-      END IF
-         DPARAM(2) = DH11
-         DPARAM(3) = DH21
-         DPARAM(4) = DH12
-         DPARAM(5) = DH22
-         DPARAM(3) = DH21
-         DPARAM(4) = DH12
-      ELSE
-         DPARAM(2) = DH11
-         DPARAM(5) = DH22
-      END IF
-      DPARAM(1) = DFLAG
-      RETURN
-      END
diff --git a/superlu/BLAS/dsbmv.f b/superlu/BLAS/dsbmv.f
deleted file mode 100644
index aea12134..00000000
--- a/superlu/BLAS/dsbmv.f
+++ /dev/null
@@ -1,375 +0,0 @@
-*> \brief \b DSBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DSBMV  performs the matrix-vector  operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric band matrix, with k super-diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the band matrix A is being supplied as
-*>           follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  being supplied.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  being supplied.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry, K specifies the number of super-diagonals of the
-*>           matrix A. K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the symmetric matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer the upper
-*>           triangular part of a symmetric band matrix from conventional
-*>           full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the symmetric matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer the lower
-*>           triangular part of a symmetric band matrix from conventional
-*>           full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (K.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 6
-          INFO = 8
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of the array A
-*     are accessed sequentially with one pass through A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when upper triangle of A is stored.
-          KPLUS1 = K + 1
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  L = KPLUS1 - J
-                  DO 50 I = MAX(1,J-K),J - 1
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  L = KPLUS1 - J
-                  DO 70 I = MAX(1,J-K),J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  IF (J.GT.K) THEN
-                      KX = KX + INCX
-                      KY = KY + INCY
-                  END IF
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when lower triangle of A is stored.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*A(1,J)
-                  L = 1 - J
-                  DO 90 I = J + 1,MIN(N,J+K)
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*A(1,J)
-                  L = 1 - J
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,MIN(N,J+K)
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DSBMV .
-      END
diff --git a/superlu/BLAS/dscal.f b/superlu/BLAS/dscal.f
deleted file mode 100644
index 8bbfec6f..00000000
--- a/superlu/BLAS/dscal.f
+++ /dev/null
@@ -1,110 +0,0 @@
-*> \brief \b DSCAL
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    DSCAL scales a vector by a constant.
-*>    uses unrolled loops for increment equal to one.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-*        clean-up loop
-         M = MOD(N,5)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DX(I) = DA*DX(I)
-            END DO
-            IF (N.LT.5) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,5
-            DX(I) = DA*DX(I)
-            DX(I+1) = DA*DX(I+1)
-            DX(I+2) = DA*DX(I+2)
-            DX(I+3) = DA*DX(I+3)
-            DX(I+4) = DA*DX(I+4)
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            DX(I) = DA*DX(I)
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/dsdot.f b/superlu/BLAS/dsdot.f
deleted file mode 100644
index f9cb4980..00000000
--- a/superlu/BLAS/dsdot.f
+++ /dev/null
@@ -1,172 +0,0 @@
-*> \brief \b DSDOT
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*    =======
-*    Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
-*    Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> Compute the inner product of two vectors with extended
-*> precision accumulation and result.
-*> Returns D.P. dot product accumulated in D.P., for S.P. SX and SY
-*> DSDOT = sum for I = 0 to N-1 of  SX(LX+I*INCX) * SY(LY+I*INCY),
-*> where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
-*> defined in a similar way using INCY.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         number of elements in input vector(s)
-*> \endverbatim
-*> \param[in] SX
-*> \verbatim
-*>          SX is REAL array, dimension(N)
-*>         single precision vector with N elements
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>          storage spacing between elements of SX
-*> \endverbatim
-*> \param[in] SY
-*> \verbatim
-*>          SY is REAL array, dimension(N)
-*>         single precision vector with N elements
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>         storage spacing between elements of SY
-*> \endverbatim
-*> \result DSDOT
-*> \verbatim
-*>         DSDOT  double precision dot product (zero if N.LE.0)
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*> \endverbatim
-*> \par References:
-*  ================
-*> \verbatim
-*>  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
-*>  Krogh, Basic linear algebra subprograms for Fortran
-*>  usage, Algorithm No. 539, Transactions on Mathematical
-*>  Software 5, 3 (September 1979), pp. 308-323.
-*>  791001  DATE WRITTEN
-*>  890831  Modified array declarations.  (WRB)
-*>  890831  REVISION DATE from Version 3.2
-*>  891214  Prologue converted to Version 4.0 format.  (BAB)
-*>  920310  Corrected definition of LX in DESCRIPTION.  (WRB)
-*>  920501  Reformatted the REFERENCES section.  (WRB)
-*>  070118  Reformat to LAPACK style (JL)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*  Authors:
-*  ========
-*  Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
-*  Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      DSDOT = 0.0D0
-      IF (N.LE.0) RETURN
-*     Code for equal, positive, non-unit increments.
-         NS = N*INCX
-         DO I = 1,NS,INCX
-            DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
-         END DO
-      ELSE
-*     Code for unequal or nonpositive increments.
-         KX = 1
-         KY = 1
-         IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-         IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-         DO I = 1,N
-            DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
-            KX = KX + INCX
-            KY = KY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/dspmv.f b/superlu/BLAS/dspmv.f
deleted file mode 100644
index 72a28fed..00000000
--- a/superlu/BLAS/dspmv.f
+++ /dev/null
@@ -1,331 +0,0 @@
-*> \brief \b DSPMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION AP(*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DSPMV  performs the matrix-vector operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is DOUBLE PRECISION array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION AP(*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 6
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when AP contains the upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  K = KK
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(I)
-                      K = K + 1
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
-                  KK = KK + J
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 K = KK,KK + J - 2
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when AP contains the lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*AP(KK)
-                  K = KK + 1
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(I)
-                      K = K + 1
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-                  KK = KK + (N-J+1)
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*AP(KK)
-                  IX = JX
-                  IY = JY
-                  DO 110 K = KK + 1,KK + N - J
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + (N-J+1)
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DSPMV .
-      END
diff --git a/superlu/BLAS/dspr.f b/superlu/BLAS/dspr.f
deleted file mode 100644
index e89f87d4..00000000
--- a/superlu/BLAS/dspr.f
+++ /dev/null
@@ -1,261 +0,0 @@
-*> \brief \b DSPR
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DSPR    performs the symmetric rank 1 operation
-*>    A := alpha*x*x**T + A,
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is DOUBLE PRECISION array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSPR  ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set the start point in X if the increment is not unity.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when upper triangle is stored in AP.
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      K = KK
-                      DO 10 I = 1,J
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   10                 CONTINUE
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = KX
-                      DO 30 K = KK,KK + J - 1
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when lower triangle is stored in AP.
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      K = KK
-                      DO 50 I = J,N
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   50                 CONTINUE
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = JX
-                      DO 70 K = KK,KK + N - J
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DSPR  .
-      END
diff --git a/superlu/BLAS/dspr2.f b/superlu/BLAS/dspr2.f
deleted file mode 100644
index 4cd416f5..00000000
--- a/superlu/BLAS/dspr2.f
+++ /dev/null
@@ -1,296 +0,0 @@
-*> \brief \b DSPR2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION AP(*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DSPR2  performs the symmetric rank 2 operation
-*>    A := alpha*x*y**T + alpha*y*x**T + A,
-*> where alpha is a scalar, x and y are n element vectors and A is an
-*> n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is DOUBLE PRECISION array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION AP(*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSPR2 ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when upper triangle is stored in AP.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      K = KK
-                      DO 10 I = 1,J
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   10                 CONTINUE
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = KX
-                      IY = KY
-                      DO 30 K = KK,KK + J - 1
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when lower triangle is stored in AP.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      K = KK
-                      DO 50 I = J,N
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   50                 CONTINUE
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = JX
-                      IY = JY
-                      DO 70 K = KK,KK + N - J
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DSPR2 .
-      END
diff --git a/superlu/BLAS/dswap.f b/superlu/BLAS/dswap.f
deleted file mode 100644
index 5bd8f7d2..00000000
--- a/superlu/BLAS/dswap.f
+++ /dev/null
@@ -1,122 +0,0 @@
-*> \brief \b DSWAP
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    interchanges two vectors.
-*>    uses unrolled loops for increments equal one.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*       code for both increments equal to 1
-*       clean-up loop
-         M = MOD(N,3)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DTEMP = DX(I)
-               DX(I) = DY(I)
-               DY(I) = DTEMP
-            END DO
-            IF (N.LT.3) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,3
-            DTEMP = DX(I)
-            DX(I) = DY(I)
-            DY(I) = DTEMP
-            DTEMP = DX(I+1)
-            DX(I+1) = DY(I+1)
-            DY(I+1) = DTEMP
-            DTEMP = DX(I+2)
-            DX(I+2) = DY(I+2)
-            DY(I+2) = DTEMP
-         END DO
-      ELSE
-*       code for unequal increments or equal increments not equal
-*         to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            DTEMP = DX(IX)
-            DX(IX) = DY(IY)
-            DY(IY) = DTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/dsymm.f b/superlu/BLAS/dsymm.f
deleted file mode 100644
index 77c797ea..00000000
--- a/superlu/BLAS/dsymm.f
+++ /dev/null
@@ -1,367 +0,0 @@
-*> \brief \b DSYMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DSYMM  performs one of the matrix-matrix operations
-*>    C := alpha*A*B + beta*C,
-*> or
-*>    C := alpha*B*A + beta*C,
-*> where alpha and beta are scalars,  A is a symmetric matrix and  B and
-*> C are  m by n matrices.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  symmetric matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  symmetric  matrix   A  is  to  be
-*>           referenced as follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is  n otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     Set NROWA as the number of rows of A.
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     And when
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSAME(SIDE,'L')) THEN
-*        Form  C := alpha*A*B + beta*C.
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*B*A + beta*C.
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*A(J,J)
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*A(J,K)
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(J,K)
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-      RETURN
-*     End of DSYMM .
-      END
diff --git a/superlu/BLAS/dsymv.f b/superlu/BLAS/dsymv.f
deleted file mode 100644
index af2dfd2a..00000000
--- a/superlu/BLAS/dsymv.f
+++ /dev/null
@@ -1,333 +0,0 @@
-*> \brief \b DSYMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DSYMV  performs the matrix-vector  operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when A is stored in upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*A(J,J) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 I = 1,J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*A(J,J) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when A is stored in lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*A(J,J)
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*A(J,J)
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,N
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DSYMV .
-      END
diff --git a/superlu/BLAS/dsyr.f b/superlu/BLAS/dsyr.f
deleted file mode 100644
index c998ee82..00000000
--- a/superlu/BLAS/dsyr.f
+++ /dev/null
@@ -1,263 +0,0 @@
-*> \brief \b DSYR
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DSYR   performs the symmetric rank 1 operation
-*>    A := alpha*x*x**T + A,
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n symmetric matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSYR  ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set the start point in X if the increment is not unity.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when A is stored in upper triangle.
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      DO 10 I = 1,J
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   10                 CONTINUE
-                  END IF
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = KX
-                      DO 30 I = 1,J
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when A is stored in lower triangle.
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      DO 50 I = J,N
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   50                 CONTINUE
-                  END IF
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = JX
-                      DO 70 I = J,N
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DSYR  .
-      END
diff --git a/superlu/BLAS/dsyr2.f b/superlu/BLAS/dsyr2.f
deleted file mode 100644
index 8bfa5fe0..00000000
--- a/superlu/BLAS/dsyr2.f
+++ /dev/null
@@ -1,298 +0,0 @@
-*> \brief \b DSYR2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DSYR2  performs the symmetric rank 2 operation
-*>    A := alpha*x*y**T + alpha*y*x**T + A,
-*> where alpha is a scalar, x and y are n element vectors and A is an n
-*> by n symmetric matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSYR2 ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when A is stored in the upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      DO 10 I = 1,J
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   10                 CONTINUE
-                  END IF
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = KX
-                      IY = KY
-                      DO 30 I = 1,J
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when A is stored in the lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      DO 50 I = J,N
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   50                 CONTINUE
-                  END IF
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = JX
-                      IY = JY
-                      DO 70 I = J,N
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DSYR2 .
-      END
diff --git a/superlu/BLAS/dsyr2k.f b/superlu/BLAS/dsyr2k.f
deleted file mode 100644
index 6dd7ca29..00000000
--- a/superlu/BLAS/dsyr2k.f
+++ /dev/null
@@ -1,399 +0,0 @@
-*> \brief \b DSYR2K
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DSYR2K  performs one of the symmetric rank 2k operations
-*>    C := alpha*A*B**T + alpha*B*A**T + beta*C,
-*> or
-*>    C := alpha*A**T*B + alpha*B**T*A + beta*C,
-*> where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-*> and  A and B  are  n by k  matrices  in the  first  case  and  k by n
-*> matrices in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   C := alpha*A*B**T + alpha*B*A**T +
-*>                                        beta*C.
-*>              TRANS = 'T' or 't'   C := alpha*A**T*B + alpha*B**T*A +
-*>                                        beta*C.
-*>              TRANS = 'C' or 'c'   C := alpha*A**T*B + alpha*B**T*A +
-*>                                        beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-*>           of rows of the matrices  A and B.  K must be at least  zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'T')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*B**T + alpha*B*A**T + C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**T*B + alpha*B**T*A + C.
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DSYR2K.
-      END
diff --git a/superlu/BLAS/dsyrk.f b/superlu/BLAS/dsyrk.f
deleted file mode 100644
index bd70dfba..00000000
--- a/superlu/BLAS/dsyrk.f
+++ /dev/null
@@ -1,364 +0,0 @@
-*> \brief \b DSYRK
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DSYRK  performs one of the symmetric rank k operations
-*>    C := alpha*A*A**T + beta*C,
-*> or
-*>    C := alpha*A**T*A + beta*C,
-*> where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-*> and  A  is an  n by k  matrix in the first case and a  k by n  matrix
-*> in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**T + beta*C.
-*>              TRANS = 'T' or 't'   C := alpha*A**T*A + beta*C.
-*>              TRANS = 'C' or 'c'   C := alpha*A**T*A + beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-*>           of rows of the matrix  A.  K must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'T')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*A**T + beta*C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**T*A + beta*C.
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP = ZERO
-                      DO 220 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of DSYRK .
-      END
diff --git a/superlu/BLAS/dtbmv.f b/superlu/BLAS/dtbmv.f
deleted file mode 100644
index 20dd83ea..00000000
--- a/superlu/BLAS/dtbmv.f
+++ /dev/null
@@ -1,398 +0,0 @@
-*> \brief \b DTBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DTBMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX   too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*         Form  x := A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = KPLUS1 - J
-                          DO 10 I = MAX(1,J-K),J - 1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = KPLUS1 - J
-                          DO 30 I = MAX(1,J-K),J - 1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
-                      END IF
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = 1 - J
-                          DO 50 I = MIN(N,J+K),J + 1,-1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(1,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = 1 - J
-                          DO 70 I = MIN(N,J+K),J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(1,J)
-                      END IF
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                      DO 90 I = J - 1,MAX(1,J-K),-1
-                          TEMP = TEMP + A(L+I,J)*X(I)
-   90                 CONTINUE
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      KX = KX - INCX
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                      DO 110 I = J - 1,MAX(1,J-K),-1
-                          TEMP = TEMP + A(L+I,J)*X(IX)
-                          IX = IX - INCX
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(1,J)
-                      DO 130 I = J + 1,MIN(N,J+K)
-                          TEMP = TEMP + A(L+I,J)*X(I)
-  130                 CONTINUE
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      KX = KX + INCX
-                      IX = KX
-                      L = 1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(1,J)
-                      DO 150 I = J + 1,MIN(N,J+K)
-                          TEMP = TEMP + A(L+I,J)*X(IX)
-                          IX = IX + INCX
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of DTBMV .
-      END
diff --git a/superlu/BLAS/dtbsv.f b/superlu/BLAS/dtbsv.f
deleted file mode 100644
index ad468288..00000000
--- a/superlu/BLAS/dtbsv.f
+++ /dev/null
@@ -1,401 +0,0 @@
-*> \brief \b DTBSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DTBSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular band matrix, with ( k + 1 )
-*> diagonals.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed by sequentially with one pass through A.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(KPLUS1,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,MAX(1,J-K),-1
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 40 J = N,1,-1
-                      KX = KX - INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(KPLUS1,J)
-                          TEMP = X(JX)
-                          DO 30 I = J - 1,MAX(1,J-K),-1
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX - INCX
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          L = 1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(1,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,MIN(N,J+K)
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      KX = KX + INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = 1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(1,J)
-                          TEMP = X(JX)
-                          DO 70 I = J + 1,MIN(N,J+K)
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX + INCX
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T)*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      DO 90 I = MAX(1,J-K),J - 1
-                          TEMP = TEMP - A(L+I,J)*X(I)
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      L = KPLUS1 - J
-                      DO 110 I = MAX(1,J-K),J - 1
-                          TEMP = TEMP - A(L+I,J)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      L = 1 - J
-                      DO 130 I = MIN(N,J+K),J + 1,-1
-                          TEMP = TEMP - A(L+I,J)*X(I)
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      L = 1 - J
-                      DO 150 I = MIN(N,J+K),J + 1,-1
-                          TEMP = TEMP - A(L+I,J)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of DTBSV .
-      END
diff --git a/superlu/BLAS/dtpmv.f b/superlu/BLAS/dtpmv.f
deleted file mode 100644
index 3b0e6209..00000000
--- a/superlu/BLAS/dtpmv.f
+++ /dev/null
@@ -1,352 +0,0 @@
-*> \brief \b DTPMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DTPMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is DOUBLE PRECISION array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*        Form  x:= A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K + 1
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK+J-1)
-                      END IF
-                      KK = KK + J
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 K = KK,KK + J - 2
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1)
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K - 1
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK-N+J)
-                      END IF
-                      KK = KK - (N-J+1)
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 K = KK,KK - (N- (J+1)),-1
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J)
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      K = KK - 1
-                      DO 90 I = J - 1,1,-1
-                          TEMP = TEMP + AP(K)*X(I)
-                          K = K - 1
-   90                 CONTINUE
-                      X(J) = TEMP
-                      KK = KK - J
-  100             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      DO 110 K = KK - 1,KK - J + 1,-1
-                          IX = IX - INCX
-                          TEMP = TEMP + AP(K)*X(IX)
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - J
-  120             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      K = KK + 1
-                      DO 130 I = J + 1,N
-                          TEMP = TEMP + AP(K)*X(I)
-                          K = K + 1
-  130                 CONTINUE
-                      X(J) = TEMP
-                      KK = KK + (N-J+1)
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      DO 150 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          TEMP = TEMP + AP(K)*X(IX)
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of DTPMV .
-      END
diff --git a/superlu/BLAS/dtpsv.f b/superlu/BLAS/dtpsv.f
deleted file mode 100644
index a5d9faa4..00000000
--- a/superlu/BLAS/dtpsv.f
+++ /dev/null
@@ -1,354 +0,0 @@
-*> \brief \b DTPSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DTPSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix, supplied in packed form.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is DOUBLE PRECISION array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK - 1
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K - 1
-   10                     CONTINUE
-                      END IF
-                      KK = KK - J
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK + 1
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K + 1
-   50                     CONTINUE
-                      END IF
-                      KK = KK + (N-J+1)
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      K = KK
-                      DO 90 I = 1,J - 1
-                          TEMP = TEMP - AP(K)*X(I)
-                          K = K + 1
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      X(J) = TEMP
-                      KK = KK + J
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 110 K = KK,KK + J - 2
-                          TEMP = TEMP - AP(K)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + J
-  120             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK
-                      DO 130 I = N,J + 1,-1
-                          TEMP = TEMP - AP(K)*X(I)
-                          K = K - 1
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      X(J) = TEMP
-                      KK = KK - (N-J+1)
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 150 K = KK,KK - (N- (J+1)),-1
-                          TEMP = TEMP - AP(K)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of DTPSV .
-      END
diff --git a/superlu/BLAS/dtrmm.f b/superlu/BLAS/dtrmm.f
deleted file mode 100644
index e315d596..00000000
--- a/superlu/BLAS/dtrmm.f
+++ /dev/null
@@ -1,415 +0,0 @@
-*> \brief \b DTRMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DTRMM  performs one of the matrix-matrix operations
-*>    B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
-*> where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>    op( A ) = A   or   op( A ) = A**T.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE specifies whether  op( A ) multiplies B from
-*>           the left or right as follows:
-*>              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
-*>              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>              TRANSA = 'C' or 'c'   op( A ) = A**T.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>           A is DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
-*>           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*> \param[in,out] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain the matrix  B,  and  on exit  is overwritten  by the
-*>           transformed matrix.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     Test the input parameters.
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-          INFO = 2
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-          INFO = 9
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*     And when
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*A*B.
-              IF (UPPER) THEN
-                  DO 50 J = 1,N
-                      DO 40 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              DO 30 I = 1,K - 1
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   30                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP*A(K,K)
-                              B(K,J) = TEMP
-                          END IF
-   40                 CONTINUE
-   50             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              B(K,J) = TEMP
-                              IF (NOUNIT) B(K,J) = B(K,J)*A(K,K)
-                              DO 60 I = K + 1,M
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   60                         CONTINUE
-                          END IF
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*A**T*B.
-              IF (UPPER) THEN
-                  DO 110 J = 1,N
-                      DO 100 I = M,1,-1
-                          TEMP = B(I,J)
-                          IF (NOUNIT) TEMP = TEMP*A(I,I)
-                          DO 90 K = 1,I - 1
-                              TEMP = TEMP + A(K,I)*B(K,J)
-   90                     CONTINUE
-                          B(I,J) = ALPHA*TEMP
-  100                 CONTINUE
-  110             CONTINUE
-              ELSE
-                  DO 140 J = 1,N
-                      DO 130 I = 1,M
-                          TEMP = B(I,J)
-                          IF (NOUNIT) TEMP = TEMP*A(I,I)
-                          DO 120 K = I + 1,M
-                              TEMP = TEMP + A(K,I)*B(K,J)
-  120                     CONTINUE
-                          B(I,J) = ALPHA*TEMP
-  130                 CONTINUE
-  140             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*B*A.
-              IF (UPPER) THEN
-                  DO 180 J = N,1,-1
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 150 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  150                 CONTINUE
-                      DO 170 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 160 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  160                         CONTINUE
-                          END IF
-  170                 CONTINUE
-  180             CONTINUE
-              ELSE
-                  DO 220 J = 1,N
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 190 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  190                 CONTINUE
-                      DO 210 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 200 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  200                         CONTINUE
-                          END IF
-  210                 CONTINUE
-  220             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*B*A**T.
-              IF (UPPER) THEN
-                  DO 260 K = 1,N
-                      DO 240 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = ALPHA*A(J,K)
-                              DO 230 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  230                         CONTINUE
-                          END IF
-  240                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(K,K)
-                      IF (TEMP.NE.ONE) THEN
-                          DO 250 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  250                     CONTINUE
-                      END IF
-  260             CONTINUE
-              ELSE
-                  DO 300 K = N,1,-1
-                      DO 280 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = ALPHA*A(J,K)
-                              DO 270 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  270                         CONTINUE
-                          END IF
-  280                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(K,K)
-                      IF (TEMP.NE.ONE) THEN
-                          DO 290 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  290                     CONTINUE
-                      END IF
-  300             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of DTRMM .
-      END
diff --git a/superlu/BLAS/dtrmv.f b/superlu/BLAS/dtrmv.f
deleted file mode 100644
index 83959d06..00000000
--- a/superlu/BLAS/dtrmv.f
+++ /dev/null
@@ -1,342 +0,0 @@
-*> \brief \b DTRMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DTRMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 6
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*        Form  x := A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*A(I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 I = 1,J - 1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*A(I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 I = N,J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 90 I = J - 1,1,-1
-                          TEMP = TEMP + A(I,J)*X(I)
-   90                 CONTINUE
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 110 I = J - 1,1,-1
-                          IX = IX - INCX
-                          TEMP = TEMP + A(I,J)*X(IX)
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 130 I = J + 1,N
-                          TEMP = TEMP + A(I,J)*X(I)
-  130                 CONTINUE
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 150 I = J + 1,N
-                          IX = IX + INCX
-                          TEMP = TEMP + A(I,J)*X(IX)
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of DTRMV .
-      END
diff --git a/superlu/BLAS/dtrsm.f b/superlu/BLAS/dtrsm.f
deleted file mode 100644
index bc440f06..00000000
--- a/superlu/BLAS/dtrsm.f
+++ /dev/null
@@ -1,443 +0,0 @@
-*> \brief \b DTRSM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DTRSM  solves one of the matrix equations
-*>    op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
-*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>    op( A ) = A   or   op( A ) = A**T.
-*> The matrix X is overwritten on B.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry, SIDE specifies whether op( A ) appears on the left
-*>           or right of X as follows:
-*>              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
-*>              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>              TRANSA = 'C' or 'c'   op( A ) = A**T.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, k ),
-*>           where k is m when SIDE = 'L' or 'l'
-*>             and k is n when SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*> \param[in,out] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain  the  right-hand  side  matrix  B,  and  on exit  is
-*>           overwritten by the solution matrix  X.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     Test the input parameters.
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-          INFO = 2
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-          INFO = 9
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*     And when
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*inv( A )*B.
-              IF (UPPER) THEN
-                  DO 60 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 30 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   30                     CONTINUE
-                      END IF
-                      DO 50 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 40 I = 1,K - 1
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   40                         CONTINUE
-                          END IF
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 100 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 70 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   70                     CONTINUE
-                      END IF
-                      DO 90 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 80 I = K + 1,M
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   80                         CONTINUE
-                          END IF
-   90                 CONTINUE
-  100             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*inv( A**T )*B.
-              IF (UPPER) THEN
-                  DO 130 J = 1,N
-                      DO 120 I = 1,M
-                          TEMP = ALPHA*B(I,J)
-                          DO 110 K = 1,I - 1
-                              TEMP = TEMP - A(K,I)*B(K,J)
-  110                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          B(I,J) = TEMP
-  120                 CONTINUE
-  130             CONTINUE
-              ELSE
-                  DO 160 J = 1,N
-                      DO 150 I = M,1,-1
-                          TEMP = ALPHA*B(I,J)
-                          DO 140 K = I + 1,M
-                              TEMP = TEMP - A(K,I)*B(K,J)
-  140                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          B(I,J) = TEMP
-  150                 CONTINUE
-  160             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*B*inv( A ).
-              IF (UPPER) THEN
-                  DO 210 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 170 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  170                     CONTINUE
-                      END IF
-                      DO 190 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 180 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  180                         CONTINUE
-                          END IF
-  190                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 200 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  200                     CONTINUE
-                      END IF
-  210             CONTINUE
-              ELSE
-                  DO 260 J = N,1,-1
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 220 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  220                     CONTINUE
-                      END IF
-                      DO 240 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 230 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  230                         CONTINUE
-                          END IF
-  240                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 250 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  250                     CONTINUE
-                      END IF
-  260             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*B*inv( A**T ).
-              IF (UPPER) THEN
-                  DO 310 K = N,1,-1
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(K,K)
-                          DO 270 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  270                     CONTINUE
-                      END IF
-                      DO 290 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = A(J,K)
-                              DO 280 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  280                         CONTINUE
-                          END IF
-  290                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 300 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  300                     CONTINUE
-                      END IF
-  310             CONTINUE
-              ELSE
-                  DO 360 K = 1,N
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(K,K)
-                          DO 320 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  320                     CONTINUE
-                      END IF
-                      DO 340 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = A(J,K)
-                              DO 330 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  330                         CONTINUE
-                          END IF
-  340                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 350 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  350                     CONTINUE
-                      END IF
-  360             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of DTRSM .
-      END
diff --git a/superlu/BLAS/dtrsv.f b/superlu/BLAS/dtrsv.f
deleted file mode 100644
index cab3fd98..00000000
--- a/superlu/BLAS/dtrsv.f
+++ /dev/null
@@ -1,338 +0,0 @@
-*> \brief \b DTRSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DTRSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 6
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*A(I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 I = J - 1,1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*A(I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 I = J + 1,N
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      DO 90 I = 1,J - 1
-                          TEMP = TEMP - A(I,J)*X(I)
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 110 I = 1,J - 1
-                          TEMP = TEMP - A(I,J)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      DO 130 I = N,J + 1,-1
-                          TEMP = TEMP - A(I,J)*X(I)
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 150 I = N,J + 1,-1
-                          TEMP = TEMP - A(I,J)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of DTRSV .
-      END
diff --git a/superlu/BLAS/dzasum.f b/superlu/BLAS/dzasum.f
deleted file mode 100644
index 9f0d3fd0..00000000
--- a/superlu/BLAS/dzasum.f
+++ /dev/null
@@ -1,98 +0,0 @@
-*> \brief \b DZASUM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    DZASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and
-*>    returns a single precision result.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-      DZASUM = 0.0d0
-      STEMP = 0.0d0
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-         DO I = 1,N
-            STEMP = STEMP + DCABS1(ZX(I))
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            STEMP = STEMP + DCABS1(ZX(I))
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/dznrm2.f b/superlu/BLAS/dznrm2.f
deleted file mode 100644
index 3b6bf613..00000000
--- a/superlu/BLAS/dznrm2.f
+++ /dev/null
@@ -1,119 +0,0 @@
-*> \brief \b DZNRM2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> DZNRM2 returns the euclidean norm of a vector via the function
-*> name, so that
-*>    DZNRM2 := sqrt( x**H*x )
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup double_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  -- This version written on 25-October-1982.
-*>     Modified on 14-October-1993 to inline the call to ZLASSQ.
-*>     Sven Hammarling, Nag Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LT.1 .OR. INCX.LT.1) THEN
-          NORM = ZERO
-      ELSE
-          SCALE = ZERO
-          SSQ = ONE
-*        The following loop is equivalent to this call to the LAPACK
-*        auxiliary routine:
-          DO 10 IX = 1,1 + (N-1)*INCX,INCX
-              IF (DBLE(X(IX)).NE.ZERO) THEN
-                  TEMP = ABS(DBLE(X(IX)))
-                  IF (SCALE.LT.TEMP) THEN
-                      SSQ = ONE + SSQ* (SCALE/TEMP)**2
-                      SCALE = TEMP
-                  ELSE
-                      SSQ = SSQ + (TEMP/SCALE)**2
-                  END IF
-              END IF
-              IF (DIMAG(X(IX)).NE.ZERO) THEN
-                  TEMP = ABS(DIMAG(X(IX)))
-                  IF (SCALE.LT.TEMP) THEN
-                      SSQ = ONE + SSQ* (SCALE/TEMP)**2
-                      SCALE = TEMP
-                  ELSE
-                      SSQ = SSQ + (TEMP/SCALE)**2
-                  END IF
-              END IF
-   10     CONTINUE
-          NORM = SCALE*SQRT(SSQ)
-      END IF
-      DZNRM2 = NORM
-      RETURN
-*     End of DZNRM2.
-      END
diff --git a/superlu/BLAS/icamax.f b/superlu/BLAS/icamax.f
deleted file mode 100644
index 37035c7a..00000000
--- a/superlu/BLAS/icamax.f
+++ /dev/null
@@ -1,107 +0,0 @@
-*> \brief \b ICAMAX
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    ICAMAX finds the index of the first element having maximum |Re(.)| + 
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup aux_blas
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-      REAL SMAX
-*     ..
-*     .. External Functions ..
-      REAL SCABS1
-*     ..
-      ICAMAX = 0
-      IF (N.LT.1 .OR. INCX.LE.0) RETURN
-      ICAMAX = 1
-      IF (N.EQ.1) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-         SMAX = SCABS1(CX(1))
-         DO I = 2,N
-            IF (SCABS1(CX(I)).GT.SMAX) THEN
-               ICAMAX = I
-               SMAX = SCABS1(CX(I))
-            END IF
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         IX = 1
-         SMAX = SCABS1(CX(1))
-         IX = IX + INCX
-         DO I = 2,N
-            IF (SCABS1(CX(IX)).GT.SMAX) THEN
-               ICAMAX = I
-               SMAX = SCABS1(CX(IX))
-            END IF
-            IX = IX + INCX
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/idamax.f b/superlu/BLAS/idamax.f
deleted file mode 100644
index 95856602..00000000
--- a/superlu/BLAS/idamax.f
+++ /dev/null
@@ -1,106 +0,0 @@
-*> \brief \b IDAMAX
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    IDAMAX finds the index of the first element having maximum absolute 
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup aux_blas
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IDAMAX = 0
-      IF (N.LT.1 .OR. INCX.LE.0) RETURN
-      IDAMAX = 1
-      IF (N.EQ.1) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-         DMAX = DABS(DX(1))
-         DO I = 2,N
-            IF (DABS(DX(I)).GT.DMAX) THEN
-               IDAMAX = I
-               DMAX = DABS(DX(I))
-            END IF
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         IX = 1
-         DMAX = DABS(DX(1))
-         IX = IX + INCX
-         DO I = 2,N
-            IF (DABS(DX(IX)).GT.DMAX) THEN
-               IDAMAX = I
-               DMAX = DABS(DX(IX))
-            END IF
-            IX = IX + INCX
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/isamax.f b/superlu/BLAS/isamax.f
deleted file mode 100644
index e7312235..00000000
--- a/superlu/BLAS/isamax.f
+++ /dev/null
@@ -1,106 +0,0 @@
-*> \brief \b ISAMAX
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    ISAMAX finds the index of the first element having maximum absolute 
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup aux_blas
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-      REAL SMAX
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      ISAMAX = 0
-      IF (N.LT.1 .OR. INCX.LE.0) RETURN
-      ISAMAX = 1
-      IF (N.EQ.1) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-         SMAX = ABS(SX(1))
-         DO I = 2,N
-            IF (ABS(SX(I)).GT.SMAX) THEN
-               ISAMAX = I
-               SMAX = ABS(SX(I))
-            END IF
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         IX = 1
-         SMAX = ABS(SX(1))
-         IX = IX + INCX
-         DO I = 2,N
-            IF (ABS(SX(IX)).GT.SMAX) THEN
-               ISAMAX = I
-               SMAX = ABS(SX(IX))
-            END IF
-            IX = IX + INCX
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/izamax.f b/superlu/BLAS/izamax.f
deleted file mode 100644
index 2ee9b664..00000000
--- a/superlu/BLAS/izamax.f
+++ /dev/null
@@ -1,107 +0,0 @@
-*> \brief \b IZAMAX
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    IZAMAX finds the index of the first element having maximum |Re(.)| + 
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup aux_blas
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, 1/15/85.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-      IZAMAX = 0
-      IF (N.LT.1 .OR. INCX.LE.0) RETURN
-      IZAMAX = 1
-      IF (N.EQ.1) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-         DMAX = DCABS1(ZX(1))
-         DO I = 2,N
-            IF (DCABS1(ZX(I)).GT.DMAX) THEN
-               IZAMAX = I
-               DMAX = DCABS1(ZX(I))
-            END IF
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         IX = 1
-         DMAX = DCABS1(ZX(1))
-         IX = IX + INCX
-         DO I = 2,N
-            IF (DCABS1(ZX(IX)).GT.DMAX) THEN
-               IZAMAX = I
-               DMAX = DCABS1(ZX(IX))
-            END IF
-            IX = IX + INCX
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/lsame.f b/superlu/BLAS/lsame.f
deleted file mode 100644
index d8194786..00000000
--- a/superlu/BLAS/lsame.f
+++ /dev/null
@@ -1,125 +0,0 @@
-*> \brief \b LSAME
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> LSAME returns .TRUE. if CA is the same letter as CB regardless of
-*> case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] CA
-*> \verbatim
-*>          CA is CHARACTER*1
-*> \endverbatim
-*> \param[in] CB
-*> \verbatim
-*>          CB is CHARACTER*1
-*>          CA and CB specify the single characters to be compared.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup aux_blas
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.1) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-* =====================================================================
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     Test if the characters are equal
-      LSAME = CA .EQ. CB
-*     Now test for equivalence if both characters are alphabetic.
-      ZCODE = ICHAR('Z')
-*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime
-*     machines, on which ICHAR returns a value with bit 8 set.
-*     ICHAR('A') on Prime machines returns 193 which is the same as
-*     ICHAR('A') on an EBCDIC machine.
-      INTA = ICHAR(CA)
-      INTB = ICHAR(CB)
-      IF (ZCODE.EQ.90 .OR. ZCODE.EQ.122) THEN
-*        ASCII is assumed - ZCODE is the ASCII code of either lower or
-*        upper case 'Z'.
-          IF (INTA.GE.97 .AND. INTA.LE.122) INTA = INTA - 32
-          IF (INTB.GE.97 .AND. INTB.LE.122) INTB = INTB - 32
-      ELSE IF (ZCODE.EQ.233 .OR. ZCODE.EQ.169) THEN
-*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
-*        upper case 'Z'.
-          IF (INTA.GE.129 .AND. INTA.LE.137 .OR.
-     +        INTA.GE.145 .AND. INTA.LE.153 .OR.
-     +        INTA.GE.162 .AND. INTA.LE.169) INTA = INTA + 64
-          IF (INTB.GE.129 .AND. INTB.LE.137 .OR.
-     +        INTB.GE.145 .AND. INTB.LE.153 .OR.
-     +        INTB.GE.162 .AND. INTB.LE.169) INTB = INTB + 64
-      ELSE IF (ZCODE.EQ.218 .OR. ZCODE.EQ.250) THEN
-*        ASCII is assumed, on Prime machines - ZCODE is the ASCII code
-*        plus 128 of either lower or upper case 'Z'.
-          IF (INTA.GE.225 .AND. INTA.LE.250) INTA = INTA - 32
-          IF (INTB.GE.225 .AND. INTB.LE.250) INTB = INTB - 32
-      END IF
-*     RETURN
-*     End of LSAME
-      END
diff --git a/superlu/BLAS/sasum.f b/superlu/BLAS/sasum.f
deleted file mode 100644
index a453ff70..00000000
--- a/superlu/BLAS/sasum.f
+++ /dev/null
@@ -1,112 +0,0 @@
-*> \brief \b SASUM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    SASUM takes the sum of the absolute values.
-*>    uses unrolled loops for increment equal to one.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      SASUM = 0.0e0
-      STEMP = 0.0e0
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-*        clean-up loop
-         M = MOD(N,6)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               STEMP = STEMP + ABS(SX(I))
-            END DO
-            IF (N.LT.6) THEN
-               SASUM = STEMP
-               RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,6
-            STEMP = STEMP + ABS(SX(I)) + ABS(SX(I+1)) +
-     $              ABS(SX(I+2)) + ABS(SX(I+3)) +
-     $              ABS(SX(I+4)) + ABS(SX(I+5))
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            STEMP = STEMP + ABS(SX(I))
-         END DO
-      END IF
-      SASUM = STEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/saxpy.f b/superlu/BLAS/saxpy.f
deleted file mode 100644
index 610dfe79..00000000
--- a/superlu/BLAS/saxpy.f
+++ /dev/null
@@ -1,115 +0,0 @@
-*> \brief \b SAXPY
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL SA
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    SAXPY constant times a vector plus a vector.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      REAL SA
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (SA.EQ.0.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-*        clean-up loop
-         M = MOD(N,4)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               SY(I) = SY(I) + SA*SX(I)
-            END DO
-         END IF
-         IF (N.LT.4) RETURN
-         MP1 = M + 1
-         DO I = MP1,N,4
-            SY(I) = SY(I) + SA*SX(I)
-            SY(I+1) = SY(I+1) + SA*SX(I+1)
-            SY(I+2) = SY(I+2) + SA*SX(I+2)
-            SY(I+3) = SY(I+3) + SA*SX(I+3)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-          SY(IY) = SY(IY) + SA*SX(IX)
-          IX = IX + INCX
-          IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/scabs1.f b/superlu/BLAS/scabs1.f
deleted file mode 100644
index b68f76f2..00000000
--- a/superlu/BLAS/scabs1.f
+++ /dev/null
@@ -1,57 +0,0 @@
-*> \brief \b SCABS1
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX Z
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SCABS1 computes |Re(.)| + |Im(.)| of a complex number
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      COMPLEX Z
-*     ..
-*  =====================================================================
-*     .. Intrinsic Functions ..
-*     ..
-      SCABS1 = ABS(REAL(Z)) + ABS(AIMAG(Z))
-      RETURN
-      END
diff --git a/superlu/BLAS/scasum.f b/superlu/BLAS/scasum.f
deleted file mode 100644
index 5fc1a56a..00000000
--- a/superlu/BLAS/scasum.f
+++ /dev/null
@@ -1,97 +0,0 @@
-*> \brief \b SCASUM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    SCASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and
-*>    returns a single precision result.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      SCASUM = 0.0e0
-      STEMP = 0.0e0
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-         DO I = 1,N
-            STEMP = STEMP + ABS(REAL(CX(I))) + ABS(AIMAG(CX(I)))
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            STEMP = STEMP + ABS(REAL(CX(I))) + ABS(AIMAG(CX(I)))
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/scnrm2.f b/superlu/BLAS/scnrm2.f
deleted file mode 100644
index 4f1f03a5..00000000
--- a/superlu/BLAS/scnrm2.f
+++ /dev/null
@@ -1,119 +0,0 @@
-*> \brief \b SCNRM2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SCNRM2 returns the euclidean norm of a vector via the function
-*> name, so that
-*>    SCNRM2 := sqrt( x**H*x )
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  -- This version written on 25-October-1982.
-*>     Modified on 14-October-1993 to inline the call to CLASSQ.
-*>     Sven Hammarling, Nag Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LT.1 .OR. INCX.LT.1) THEN
-          NORM = ZERO
-      ELSE
-          SCALE = ZERO
-          SSQ = ONE
-*        The following loop is equivalent to this call to the LAPACK
-*        auxiliary routine:
-          DO 10 IX = 1,1 + (N-1)*INCX,INCX
-              IF (REAL(X(IX)).NE.ZERO) THEN
-                  TEMP = ABS(REAL(X(IX)))
-                  IF (SCALE.LT.TEMP) THEN
-                      SSQ = ONE + SSQ* (SCALE/TEMP)**2
-                      SCALE = TEMP
-                  ELSE
-                      SSQ = SSQ + (TEMP/SCALE)**2
-                  END IF
-              END IF
-              IF (AIMAG(X(IX)).NE.ZERO) THEN
-                  TEMP = ABS(AIMAG(X(IX)))
-                  IF (SCALE.LT.TEMP) THEN
-                      SSQ = ONE + SSQ* (SCALE/TEMP)**2
-                      SCALE = TEMP
-                  ELSE
-                      SSQ = SSQ + (TEMP/SCALE)**2
-                  END IF
-              END IF
-   10     CONTINUE
-          NORM = SCALE*SQRT(SSQ)
-      END IF
-      SCNRM2 = NORM
-      RETURN
-*     End of SCNRM2.
-      END
diff --git a/superlu/BLAS/scopy.f b/superlu/BLAS/scopy.f
deleted file mode 100644
index 47557971..00000000
--- a/superlu/BLAS/scopy.f
+++ /dev/null
@@ -1,115 +0,0 @@
-*> \brief \b SCOPY
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    SCOPY copies a vector, x, to a vector, y.
-*>    uses unrolled loops for increments equal to 1.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-*        clean-up loop
-         M = MOD(N,7)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               SY(I) = SX(I)
-            END DO
-            IF (N.LT.7) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,7
-            SY(I) = SX(I)
-            SY(I+1) = SX(I+1)
-            SY(I+2) = SX(I+2)
-            SY(I+3) = SX(I+3)
-            SY(I+4) = SX(I+4)
-            SY(I+5) = SX(I+5)
-            SY(I+6) = SX(I+6)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            SY(IY) = SX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/sdot.f b/superlu/BLAS/sdot.f
deleted file mode 100644
index 5a54ee24..00000000
--- a/superlu/BLAS/sdot.f
+++ /dev/null
@@ -1,117 +0,0 @@
-*> \brief \b SDOT
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    SDOT forms the dot product of two vectors.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      STEMP = 0.0e0
-      SDOT = 0.0e0
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-*        clean-up loop
-         M = MOD(N,5)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               STEMP = STEMP + SX(I)*SY(I)
-            END DO
-            IF (N.LT.5) THEN
-               SDOT=STEMP
-            RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,5
-          STEMP = STEMP + SX(I)*SY(I) + SX(I+1)*SY(I+1) +
-     $            SX(I+2)*SY(I+2) + SX(I+3)*SY(I+3) + SX(I+4)*SY(I+4)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            STEMP = STEMP + SX(IX)*SY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      SDOT = STEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/sdsdot.f b/superlu/BLAS/sdsdot.f
deleted file mode 100644
index 7ee6ad6b..00000000
--- a/superlu/BLAS/sdsdot.f
+++ /dev/null
@@ -1,255 +0,0 @@
-*> \brief \b SDSDOT
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL SB
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*    =======
-*    Compute the inner product of two vectors with extended
-*    precision accumulation.
-*    Returns S.P. result with dot product accumulated in D.P.
-*    SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
-*    where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
-*    defined in a similar way using INCY.
-*    AUTHOR
-*    ======
-*    Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
-*    Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
-*    =========
-*    N      (input) INTEGER
-*           number of elements in input vector(s)
-*    SB     (input) REAL
-*           single precision scalar to be added to inner product
-*    SX     (input) REAL array, dimension (N)
-*           single precision vector with N elements
-*    INCX   (input) INTEGER
-*           storage spacing between elements of SX
-*    SY     (input) REAL array, dimension (N)
-*           single precision vector with N elements
-*    INCY   (input) INTEGER
-*           storage spacing between elements of SY
-*    SDSDOT (output) REAL
-*           single precision dot product (SB if N .LE. 0)
-*    Further Details
-*    ===============
-*    C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
-*    Krogh, Basic linear algebra subprograms for Fortran
-*    usage, Algorithm No. 539, Transactions on Mathematical
-*    Software 5, 3 (September 1979), pp. 308-323.
-*    791001  DATE WRITTEN
-*    890531  Changed all specific intrinsics to generic.  (WRB)
-*    890831  Modified array declarations.  (WRB)
-*    890831  REVISION DATE from Version 3.2
-*    891214  Prologue converted to Version 4.0 format.  (BAB)
-*    920310  Corrected definition of LX in DESCRIPTION.  (WRB)
-*    920501  Reformatted the REFERENCES section.  (WRB)
-*    070118  Reformat to LAPACK coding style
-*    =====================================================================
-*       .. Local Scalars ..
-*       ..
-*       .. Intrinsic Functions ..
-*       ..
-*       DSDOT = SB
-*       IF (N.LE.0) THEN
-*          SDSDOT = DSDOT
-*          RETURN
-*       END IF
-*       Code for equal and positive increments.
-*          NS = N*INCX
-*          DO I = 1,NS,INCX
-*             DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
-*          END DO
-*       ELSE
-*       Code for unequal or nonpositive increments.
-*          KX = 1
-*          KY = 1
-*          IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-*          IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-*          DO I = 1,N
-*             DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
-*             KX = KX + INCX
-*             KY = KY + INCY
-*          END DO
-*       END IF
-*       SDSDOT = DSDOT
-*       RETURN
-*       END
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      REAL SB
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*  =======
-*  Compute the inner product of two vectors with extended
-*  precision accumulation.
-*  Returns S.P. result with dot product accumulated in D.P.
-*  SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
-*  where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
-*  defined in a similar way using INCY.
-*  ======
-*  Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
-*  Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
-*  =========
-*  N      (input) INTEGER
-*         number of elements in input vector(s)
-*  SB     (input) REAL
-*         single precision scalar to be added to inner product
-*  SX     (input) REAL array, dimension (N)
-*         single precision vector with N elements
-*  INCX   (input) INTEGER
-*         storage spacing between elements of SX
-*  SY     (input) REAL array, dimension (N)
-*         single precision vector with N elements
-*  INCY   (input) INTEGER
-*         storage spacing between elements of SY
-*  SDSDOT (output) REAL
-*         single precision dot product (SB if N .LE. 0)
-*  Further Details
-*  ===============
-*  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
-*  Krogh, Basic linear algebra subprograms for Fortran
-*  usage, Algorithm No. 539, Transactions on Mathematical
-*  Software 5, 3 (September 1979), pp. 308-323.
-*  791001  DATE WRITTEN
-*  890531  Changed all specific intrinsics to generic.  (WRB)
-*  890831  Modified array declarations.  (WRB)
-*  890831  REVISION DATE from Version 3.2
-*  891214  Prologue converted to Version 4.0 format.  (BAB)
-*  920310  Corrected definition of LX in DESCRIPTION.  (WRB)
-*  920501  Reformatted the REFERENCES section.  (WRB)
-*  070118  Reformat to LAPACK coding style
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      DSDOT = SB
-      IF (N.LE.0) THEN
-         SDSDOT = DSDOT
-         RETURN
-      END IF
-*     Code for equal and positive increments.
-         NS = N*INCX
-         DO I = 1,NS,INCX
-            DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
-         END DO
-      ELSE
-*     Code for unequal or nonpositive increments.
-         KX = 1
-         KY = 1
-         IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-         IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-         DO I = 1,N
-            DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
-            KX = KX + INCX
-            KY = KY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/sgbmv.f b/superlu/BLAS/sgbmv.f
deleted file mode 100644
index 92896324..00000000
--- a/superlu/BLAS/sgbmv.f
+++ /dev/null
@@ -1,370 +0,0 @@
-*> \brief \b SGBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SGBMV  performs one of the matrix-vector operations
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n band matrix, with kl sub-diagonals and ku super-diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>              TRANS = 'C' or 'c'   y := alpha*A**T*x + beta*y.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] KL
-*> \verbatim
-*>          KL is INTEGER
-*>           On entry, KL specifies the number of sub-diagonals of the
-*>           matrix A. KL must satisfy  0 .le. KL.
-*> \endverbatim
-*> \param[in] KU
-*> \verbatim
-*>          KU is INTEGER
-*>           On entry, KU specifies the number of super-diagonals of the
-*>           matrix A. KU must satisfy  0 .le. KU.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading ( kl + ku + 1 ) by n part of the
-*>           array A must contain the matrix of coefficients, supplied
-*>           column by column, with the leading diagonal of the matrix in
-*>           row ( ku + 1 ) of the array, the first super-diagonal
-*>           starting at position 2 in row ku, the first sub-diagonal
-*>           starting at position 1 in row ( ku + 2 ), and so on.
-*>           Elements in the array A that do not correspond to elements
-*>           in the band matrix (such as the top left ku by ku triangle)
-*>           are not referenced.
-*>           The following program segment will transfer a band matrix
-*>           from conventional full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    K = KU + 1 - J
-*>                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
-*>                       A( K + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( kl + ku + 1 ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is REAL array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (KL.LT.0) THEN
-          INFO = 4
-      ELSE IF (KU.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (KL+KU+1)) THEN
-          INFO = 8
-          INFO = 10
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the band part of A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      KUP1 = KU + 1
-*        Form  y := alpha*A*x + y.
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  K = KUP1 - J
-                  DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(I) = Y(I) + TEMP*A(K+I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  K = KUP1 - J
-                  DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(IY) = Y(IY) + TEMP*A(K+I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-                  IF (J.GT.KU) KY = KY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y := alpha*A**T*x + y.
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 100 J = 1,N
-                  TEMP = ZERO
-                  K = KUP1 - J
-                  DO 90 I = MAX(1,J-KU),MIN(M,J+KL)
-                      TEMP = TEMP + A(K+I,J)*X(I)
-   90             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  100         CONTINUE
-          ELSE
-              DO 120 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  K = KUP1 - J
-                  DO 110 I = MAX(1,J-KU),MIN(M,J+KL)
-                      TEMP = TEMP + A(K+I,J)*X(IX)
-                      IX = IX + INCX
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-                  IF (J.GT.KU) KX = KX + INCX
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SGBMV .
-      END
diff --git a/superlu/BLAS/sgemm.f b/superlu/BLAS/sgemm.f
deleted file mode 100644
index d7bdb9c4..00000000
--- a/superlu/BLAS/sgemm.f
+++ /dev/null
@@ -1,384 +0,0 @@
-*> \brief \b SGEMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SGEMM  performs one of the matrix-matrix operations
-*>    C := alpha*op( A )*op( B ) + beta*C,
-*> where  op( X ) is one of
-*>    op( X ) = X   or   op( X ) = X**T,
-*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
-*> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n',  op( A ) = A.
-*>              TRANSA = 'T' or 't',  op( A ) = A**T.
-*>              TRANSA = 'C' or 'c',  op( A ) = A**T.
-*> \endverbatim
-*> \param[in] TRANSB
-*> \verbatim
-*>          TRANSB is CHARACTER*1
-*>           On entry, TRANSB specifies the form of op( B ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSB = 'N' or 'n',  op( B ) = B.
-*>              TRANSB = 'T' or 't',  op( B ) = B**T.
-*>              TRANSB = 'C' or 'c',  op( B ) = B**T.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies  the number  of rows  of the  matrix
-*>           op( A )  and of the  matrix  C.  M  must  be at least  zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N  specifies the number  of columns of the matrix
-*>           op( B ) and the number of columns of the matrix C. N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry,  K  specifies  the number of columns of the matrix
-*>           op( A ) and the number of rows of the matrix op( B ). K must
-*>           be at least  zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-*>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by m  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, k ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is REAL array of DIMENSION ( LDB, kb ), where kb is
-*>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-*>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  n by k  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
-*>           LDB must be at least  max( 1, k ), otherwise  LDB must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is REAL array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n  matrix
-*>           ( alpha*op( A )*op( B ) + beta*C ).
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
-*     and  columns of  A  and the  number of  rows  of  B  respectively.
-      IF (NOTA) THEN
-          NROWA = M
-          NCOLA = K
-      ELSE
-          NROWA = K
-          NCOLA = M
-      END IF
-      IF (NOTB) THEN
-          NROWB = K
-      ELSE
-          NROWB = N
-      END IF
-*     Test the input parameters.
-      INFO = 0
-     +    (.NOT.LSAME(TRANSA,'T'))) THEN
-          INFO = 1
-     +         (.NOT.LSAME(TRANSB,'T'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-          INFO = 8
-          INFO = 10
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And if
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-      IF (NOTB) THEN
-          IF (NOTA) THEN
-*           Form  C := alpha*A*B + beta*C.
-              DO 90 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 50 I = 1,M
-                          C(I,J) = ZERO
-   50                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 60 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-   60                 CONTINUE
-                  END IF
-                  DO 80 L = 1,K
-                      TEMP = ALPHA*B(L,J)
-                      DO 70 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-   70                 CONTINUE
-   80             CONTINUE
-   90         CONTINUE
-          ELSE
-*           Form  C := alpha*A**T*B + beta*C
-              DO 120 J = 1,N
-                  DO 110 I = 1,M
-                      TEMP = ZERO
-                      DO 100 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(L,J)
-  100                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  110             CONTINUE
-  120         CONTINUE
-          END IF
-      ELSE
-          IF (NOTA) THEN
-*           Form  C := alpha*A*B**T + beta*C
-              DO 170 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 130 I = 1,M
-                          C(I,J) = ZERO
-  130                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 140 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  140                 CONTINUE
-                  END IF
-                  DO 160 L = 1,K
-                      TEMP = ALPHA*B(J,L)
-                      DO 150 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  150                 CONTINUE
-  160             CONTINUE
-  170         CONTINUE
-          ELSE
-*           Form  C := alpha*A**T*B**T + beta*C
-              DO 200 J = 1,N
-                  DO 190 I = 1,M
-                      TEMP = ZERO
-                      DO 180 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(J,L)
-  180                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  190             CONTINUE
-  200         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SGEMM .
-      END
diff --git a/superlu/BLAS/sgemv.f b/superlu/BLAS/sgemv.f
deleted file mode 100644
index 0dfb1fc0..00000000
--- a/superlu/BLAS/sgemv.f
+++ /dev/null
@@ -1,330 +0,0 @@
-*> \brief \b SGEMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SGEMV  performs one of the matrix-vector operations
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>              TRANS = 'C' or 'c'   y := alpha*A**T*x + beta*y.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is REAL array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry with BETA non-zero, the incremented array Y
-*>           must contain the vector y. On exit, Y is overwritten by the
-*>           updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-          INFO = 6
-          INFO = 8
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-*        Form  y := alpha*A*x + y.
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  DO 50 I = 1,M
-                      Y(I) = Y(I) + TEMP*A(I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  DO 70 I = 1,M
-                      Y(IY) = Y(IY) + TEMP*A(I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y := alpha*A**T*x + y.
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 100 J = 1,N
-                  TEMP = ZERO
-                  DO 90 I = 1,M
-                      TEMP = TEMP + A(I,J)*X(I)
-   90             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  100         CONTINUE
-          ELSE
-              DO 120 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  DO 110 I = 1,M
-                      TEMP = TEMP + A(I,J)*X(IX)
-                      IX = IX + INCX
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SGEMV .
-      END
diff --git a/superlu/BLAS/sger.f b/superlu/BLAS/sger.f
deleted file mode 100644
index c2a9958f..00000000
--- a/superlu/BLAS/sger.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b SGER
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SGER   performs the rank 1 operation
-*>    A := alpha*x*y**T + A,
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SGER  ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-      RETURN
-*     End of SGER  .
-      END
diff --git a/superlu/BLAS/snrm2.f b/superlu/BLAS/snrm2.f
deleted file mode 100644
index 7de03d22..00000000
--- a/superlu/BLAS/snrm2.f
+++ /dev/null
@@ -1,112 +0,0 @@
-*> \brief \b SNRM2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SNRM2 returns the euclidean norm of a vector via the function
-*> name, so that
-*>    SNRM2 := sqrt( x'*x ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  -- This version written on 25-October-1982.
-*>     Modified on 14-October-1993 to inline the call to SLASSQ.
-*>     Sven Hammarling, Nag Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LT.1 .OR. INCX.LT.1) THEN
-          NORM = ZERO
-      ELSE IF (N.EQ.1) THEN
-          NORM = ABS(X(1))
-      ELSE
-          SCALE = ZERO
-          SSQ = ONE
-*        The following loop is equivalent to this call to the LAPACK
-*        auxiliary routine:
-          DO 10 IX = 1,1 + (N-1)*INCX,INCX
-              IF (X(IX).NE.ZERO) THEN
-                  ABSXI = ABS(X(IX))
-                  IF (SCALE.LT.ABSXI) THEN
-                      SSQ = ONE + SSQ* (SCALE/ABSXI)**2
-                      SCALE = ABSXI
-                  ELSE
-                      SSQ = SSQ + (ABSXI/SCALE)**2
-                  END IF
-              END IF
-   10     CONTINUE
-          NORM = SCALE*SQRT(SSQ)
-      END IF
-      SNRM2 = NORM
-      RETURN
-*     End of SNRM2.
-      END
diff --git a/superlu/BLAS/srot.f b/superlu/BLAS/srot.f
deleted file mode 100644
index fa8e2958..00000000
--- a/superlu/BLAS/srot.f
+++ /dev/null
@@ -1,101 +0,0 @@
-*> \brief \b SROT
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL C,S
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    applies a plane rotation.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      REAL C,S
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*       code for both increments equal to 1
-         DO I = 1,N
-            STEMP = C*SX(I) + S*SY(I)
-            SY(I) = C*SY(I) - S*SX(I)
-            SX(I) = STEMP
-         END DO
-      ELSE
-*       code for unequal increments or equal increments not equal
-*         to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            STEMP = C*SX(IX) + S*SY(IY)
-            SY(IY) = C*SY(IY) - S*SX(IX)
-            SX(IX) = STEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/srotg.f b/superlu/BLAS/srotg.f
deleted file mode 100644
index b4484fb3..00000000
--- a/superlu/BLAS/srotg.f
+++ /dev/null
@@ -1,86 +0,0 @@
-*> \brief \b SROTG
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL C,S,SA,SB
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    SROTG construct givens plane rotation.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      REAL C,S,SA,SB
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      ROE = SB
-      IF (ABS(SA).GT.ABS(SB)) ROE = SA
-      SCALE = ABS(SA) + ABS(SB)
-      IF (SCALE.EQ.0.0) THEN
-         C = 1.0
-         S = 0.0
-         R = 0.0
-         Z = 0.0
-      ELSE
-         R = SCALE*SQRT((SA/SCALE)**2+ (SB/SCALE)**2)
-         R = SIGN(1.0,ROE)*R
-         C = SA/R
-         S = SB/R
-         Z = 1.0
-         IF (ABS(SA).GT.ABS(SB)) Z = S
-         IF (ABS(SB).GE.ABS(SA) .AND. C.NE.0.0) Z = 1.0/C
-      END IF
-      SA = R
-      SB = Z
-      RETURN
-      END
diff --git a/superlu/BLAS/srotm.f b/superlu/BLAS/srotm.f
deleted file mode 100644
index c71f7f01..00000000
--- a/superlu/BLAS/srotm.f
+++ /dev/null
@@ -1,203 +0,0 @@
-*> \brief \b SROTM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL SPARAM(5),SX(*),SY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    (SX**T)
-*>    SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE
-*>    SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0
-*>      (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0)
-*>    H=(          )    (          )    (          )    (          )
-*>      (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0).
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         number of elements in input vector(s)
-*> \endverbatim
-*> \param[in,out] SX
-*> \verbatim
-*>          SX is REAL array, dimension N
-*>         double precision vector with N elements
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>         storage spacing between elements of SX
-*> \endverbatim
-*> \param[in,out] SY
-*> \verbatim
-*>          SY is REAL array, dimension N
-*>         double precision vector with N elements
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>         storage spacing between elements of SY
-*> \endverbatim
-*> \param[in,out] SPARAM
-*> \verbatim
-*>          SPARAM is REAL array, dimension 5
-*>     SPARAM(1)=SFLAG
-*>     SPARAM(2)=SH11
-*>     SPARAM(3)=SH21
-*>     SPARAM(4)=SH12
-*>     SPARAM(5)=SH22
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL SPARAM(5),SX(*),SY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Data statements ..
-      DATA ZERO,TWO/0.E0,2.E0/
-*     ..
-      SFLAG = SPARAM(1)
-         NSTEPS = N*INCX
-            SH11 = SPARAM(2)
-            SH12 = SPARAM(4)
-            SH21 = SPARAM(3)
-            SH22 = SPARAM(5)
-            DO I = 1,NSTEPS,INCX
-               W = SX(I)
-               Z = SY(I)
-               SX(I) = W*SH11 + Z*SH12
-               SY(I) = W*SH21 + Z*SH22
-            END DO
-            SH12 = SPARAM(4)
-            SH21 = SPARAM(3)
-            DO I = 1,NSTEPS,INCX
-               W = SX(I)
-               Z = SY(I)
-               SX(I) = W + Z*SH12
-               SY(I) = W*SH21 + Z
-            END DO
-         ELSE
-            SH11 = SPARAM(2)
-            SH22 = SPARAM(5)
-            DO I = 1,NSTEPS,INCX
-               W = SX(I)
-               Z = SY(I)
-               SX(I) = W*SH11 + Z
-               SY(I) = -W + SH22*Z
-            END DO
-         END IF
-      ELSE
-         KX = 1
-         KY = 1
-         IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-         IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-            SH11 = SPARAM(2)
-            SH12 = SPARAM(4)
-            SH21 = SPARAM(3)
-            SH22 = SPARAM(5)
-            DO I = 1,N
-               W = SX(KX)
-               Z = SY(KY)
-               SX(KX) = W*SH11 + Z*SH12
-               SY(KY) = W*SH21 + Z*SH22
-               KX = KX + INCX
-               KY = KY + INCY
-            END DO
-            SH12 = SPARAM(4)
-            SH21 = SPARAM(3)
-            DO I = 1,N
-               W = SX(KX)
-               Z = SY(KY)
-               SX(KX) = W + Z*SH12
-               SY(KY) = W*SH21 + Z
-               KX = KX + INCX
-               KY = KY + INCY
-            END DO
-         ELSE
-             SH11 = SPARAM(2)
-             SH22 = SPARAM(5)
-             DO I = 1,N
-                W = SX(KX)
-                Z = SY(KY)
-                SX(KX) = W*SH11 + Z
-                SY(KY) = -W + SH22*Z
-                KX = KX + INCX
-                KY = KY + INCY
-            END DO
-         END IF
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/srotmg.f b/superlu/BLAS/srotmg.f
deleted file mode 100644
index a5077c06..00000000
--- a/superlu/BLAS/srotmg.f
+++ /dev/null
@@ -1,251 +0,0 @@
-*> \brief \b SROTMG
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL SD1,SD2,SX1,SY1
-*       ..
-*       .. Array Arguments ..
-*       REAL SPARAM(5)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0
-*>      (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0)
-*>    H=(          )    (          )    (          )    (          )
-*>      (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0).
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in,out] SD1
-*> \verbatim
-*>          SD1 is REAL
-*> \endverbatim
-*> \param[in,out] SD2
-*> \verbatim
-*>          SD2 is REAL
-*> \endverbatim
-*> \param[in,out] SX1
-*> \verbatim
-*>          SX1 is REAL
-*> \endverbatim
-*> \param[in] SY1
-*> \verbatim
-*>          SY1 is REAL
-*> \endverbatim
-*> \param[in,out] SPARAM
-*> \verbatim
-*>          SPARAM is REAL array, dimension 5
-*>     SPARAM(1)=SFLAG
-*>     SPARAM(2)=SH11
-*>     SPARAM(3)=SH21
-*>     SPARAM(4)=SH12
-*>     SPARAM(5)=SH22
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      REAL SD1,SD2,SX1,SY1
-*     ..
-*     .. Array Arguments ..
-      REAL SPARAM(5)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-     $     SQ2,STEMP,SU,TWO,ZERO
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Data statements ..
-      DATA ZERO,ONE,TWO/0.E0,1.E0,2.E0/
-      DATA GAM,GAMSQ,RGAMSQ/4096.E0,1.67772E7,5.96046E-8/
-*     ..
-      IF (SD1.LT.ZERO) THEN
-*        GO ZERO-H-D-AND-SX1..
-         SFLAG = -ONE
-         SH11 = ZERO
-         SH12 = ZERO
-         SH21 = ZERO
-         SH22 = ZERO
-         SD1 = ZERO
-         SD2 = ZERO
-         SX1 = ZERO
-      ELSE
-         SP2 = SD2*SY1
-         IF (SP2.EQ.ZERO) THEN
-            SFLAG = -TWO
-            SPARAM(1) = SFLAG
-            RETURN
-         END IF
-*        REGULAR-CASE..
-         SP1 = SD1*SX1
-         SQ2 = SP2*SY1
-         SQ1 = SP1*SX1
-         IF (ABS(SQ1).GT.ABS(SQ2)) THEN
-            SH21 = -SY1/SX1
-            SH12 = SP2/SP1
-            SU = ONE - SH12*SH21
-           IF (SU.GT.ZERO) THEN
-             SFLAG = ZERO
-             SD1 = SD1/SU
-             SD2 = SD2/SU
-             SX1 = SX1*SU
-           END IF
-         ELSE
-            IF (SQ2.LT.ZERO) THEN
-*              GO ZERO-H-D-AND-SX1..
-               SFLAG = -ONE
-               SH11 = ZERO
-               SH12 = ZERO
-               SH21 = ZERO
-               SH22 = ZERO
-               SD1 = ZERO
-               SD2 = ZERO
-               SX1 = ZERO
-            ELSE
-               SFLAG = ONE
-               SH11 = SP1/SP2
-               SH22 = SX1/SY1
-               SU = ONE + SH11*SH22
-               STEMP = SD2/SU
-               SD2 = SD1/SU
-               SD1 = STEMP
-               SX1 = SY1*SU
-            END IF
-         END IF
-         IF (SD1.NE.ZERO) THEN
-            DO WHILE ((SD1.LE.RGAMSQ) .OR. (SD1.GE.GAMSQ))
-               IF (SFLAG.EQ.ZERO) THEN
-                  SH11 = ONE
-                  SH22 = ONE
-                  SFLAG = -ONE
-               ELSE
-                  SH21 = -ONE
-                  SH12 = ONE
-                  SFLAG = -ONE
-               END IF
-               IF (SD1.LE.RGAMSQ) THEN
-                  SD1 = SD1*GAM**2
-                  SX1 = SX1/GAM
-                  SH11 = SH11/GAM
-                  SH12 = SH12/GAM
-               ELSE
-                  SD1 = SD1/GAM**2
-                  SX1 = SX1*GAM
-                  SH11 = SH11*GAM
-                  SH12 = SH12*GAM
-               END IF
-            ENDDO
-         END IF
-         IF (SD2.NE.ZERO) THEN
-            DO WHILE ( (ABS(SD2).LE.RGAMSQ) .OR. (ABS(SD2).GE.GAMSQ) )
-               IF (SFLAG.EQ.ZERO) THEN
-                  SH11 = ONE
-                  SH22 = ONE
-                  SFLAG = -ONE
-               ELSE
-                  SH21 = -ONE
-                  SH12 = ONE
-                  SFLAG = -ONE
-               END IF
-               IF (ABS(SD2).LE.RGAMSQ) THEN
-                  SD2 = SD2*GAM**2
-                  SH21 = SH21/GAM
-                  SH22 = SH22/GAM
-               ELSE
-                  SD2 = SD2/GAM**2
-                  SH21 = SH21*GAM
-                  SH22 = SH22*GAM
-               END IF
-            END DO
-         END IF
-      END IF
-         SPARAM(2) = SH11
-         SPARAM(3) = SH21
-         SPARAM(4) = SH12
-         SPARAM(5) = SH22
-         SPARAM(3) = SH21
-         SPARAM(4) = SH12
-      ELSE
-         SPARAM(2) = SH11
-         SPARAM(5) = SH22
-      END IF
-      SPARAM(1) = SFLAG
-      RETURN
-      END
diff --git a/superlu/BLAS/ssbmv.f b/superlu/BLAS/ssbmv.f
deleted file mode 100644
index b711d8b0..00000000
--- a/superlu/BLAS/ssbmv.f
+++ /dev/null
@@ -1,375 +0,0 @@
-*> \brief \b SSBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SSBMV  performs the matrix-vector  operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric band matrix, with k super-diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the band matrix A is being supplied as
-*>           follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  being supplied.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  being supplied.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry, K specifies the number of super-diagonals of the
-*>           matrix A. K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the symmetric matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer the upper
-*>           triangular part of a symmetric band matrix from conventional
-*>           full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the symmetric matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer the lower
-*>           triangular part of a symmetric band matrix from conventional
-*>           full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is REAL array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (K.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 6
-          INFO = 8
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of the array A
-*     are accessed sequentially with one pass through A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when upper triangle of A is stored.
-          KPLUS1 = K + 1
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  L = KPLUS1 - J
-                  DO 50 I = MAX(1,J-K),J - 1
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  L = KPLUS1 - J
-                  DO 70 I = MAX(1,J-K),J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  IF (J.GT.K) THEN
-                      KX = KX + INCX
-                      KY = KY + INCY
-                  END IF
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when lower triangle of A is stored.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*A(1,J)
-                  L = 1 - J
-                  DO 90 I = J + 1,MIN(N,J+K)
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*A(1,J)
-                  L = 1 - J
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,MIN(N,J+K)
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SSBMV .
-      END
diff --git a/superlu/BLAS/sscal.f b/superlu/BLAS/sscal.f
deleted file mode 100644
index 2ffc1a25..00000000
--- a/superlu/BLAS/sscal.f
+++ /dev/null
@@ -1,110 +0,0 @@
-*> \brief \b SSCAL
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL SA
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    scales a vector by a constant.
-*>    uses unrolled loops for increment equal to 1.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      REAL SA
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-*        clean-up loop
-         M = MOD(N,5)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               SX(I) = SA*SX(I)
-            END DO
-            IF (N.LT.5) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,5
-            SX(I) = SA*SX(I)
-            SX(I+1) = SA*SX(I+1)
-            SX(I+2) = SA*SX(I+2)
-            SX(I+3) = SA*SX(I+3)
-            SX(I+4) = SA*SX(I+4)
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            SX(I) = SA*SX(I)
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/sspmv.f b/superlu/BLAS/sspmv.f
deleted file mode 100644
index bc8af3d4..00000000
--- a/superlu/BLAS/sspmv.f
+++ /dev/null
@@ -1,331 +0,0 @@
-*> \brief \b SSPMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL AP(*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SSPMV  performs the matrix-vector operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is REAL array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL AP(*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 6
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when AP contains the upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  K = KK
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(I)
-                      K = K + 1
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
-                  KK = KK + J
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 K = KK,KK + J - 2
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when AP contains the lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*AP(KK)
-                  K = KK + 1
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(I)
-                      K = K + 1
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-                  KK = KK + (N-J+1)
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*AP(KK)
-                  IX = JX
-                  IY = JY
-                  DO 110 K = KK + 1,KK + N - J
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + (N-J+1)
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SSPMV .
-      END
diff --git a/superlu/BLAS/sspr.f b/superlu/BLAS/sspr.f
deleted file mode 100644
index 52cb7317..00000000
--- a/superlu/BLAS/sspr.f
+++ /dev/null
@@ -1,261 +0,0 @@
-*> \brief \b SSPR
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       ..
-*       .. Array Arguments ..
-*       REAL AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SSPR    performs the symmetric rank 1 operation
-*>    A := alpha*x*x**T + A,
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is REAL array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL AP(*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSPR  ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set the start point in X if the increment is not unity.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when upper triangle is stored in AP.
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      K = KK
-                      DO 10 I = 1,J
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   10                 CONTINUE
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = KX
-                      DO 30 K = KK,KK + J - 1
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when lower triangle is stored in AP.
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      K = KK
-                      DO 50 I = J,N
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   50                 CONTINUE
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = JX
-                      DO 70 K = KK,KK + N - J
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SSPR  .
-      END
diff --git a/superlu/BLAS/sspr2.f b/superlu/BLAS/sspr2.f
deleted file mode 100644
index b4c81187..00000000
--- a/superlu/BLAS/sspr2.f
+++ /dev/null
@@ -1,296 +0,0 @@
-*> \brief \b SSPR2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       ..
-*       .. Array Arguments ..
-*       REAL AP(*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SSPR2  performs the symmetric rank 2 operation
-*>    A := alpha*x*y**T + alpha*y*x**T + A,
-*> where alpha is a scalar, x and y are n element vectors and A is an
-*> n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is REAL array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL AP(*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSPR2 ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when upper triangle is stored in AP.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      K = KK
-                      DO 10 I = 1,J
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   10                 CONTINUE
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = KX
-                      IY = KY
-                      DO 30 K = KK,KK + J - 1
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when lower triangle is stored in AP.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      K = KK
-                      DO 50 I = J,N
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   50                 CONTINUE
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = JX
-                      IY = JY
-                      DO 70 K = KK,KK + N - J
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SSPR2 .
-      END
diff --git a/superlu/BLAS/sswap.f b/superlu/BLAS/sswap.f
deleted file mode 100644
index f821a1e7..00000000
--- a/superlu/BLAS/sswap.f
+++ /dev/null
@@ -1,122 +0,0 @@
-*> \brief \b SSWAP
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    interchanges two vectors.
-*>    uses unrolled loops for increments equal to 1.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*       code for both increments equal to 1
-*       clean-up loop
-         M = MOD(N,3)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               STEMP = SX(I)
-               SX(I) = SY(I)
-               SY(I) = STEMP
-            END DO
-            IF (N.LT.3) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,3
-            STEMP = SX(I)
-            SX(I) = SY(I)
-            SY(I) = STEMP
-            STEMP = SX(I+1)
-            SX(I+1) = SY(I+1)
-            SY(I+1) = STEMP
-            STEMP = SX(I+2)
-            SX(I+2) = SY(I+2)
-            SY(I+2) = STEMP
-         END DO
-      ELSE
-*       code for unequal increments or equal increments not equal
-*         to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            STEMP = SX(IX)
-            SX(IX) = SY(IY)
-            SY(IY) = STEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/ssymm.f b/superlu/BLAS/ssymm.f
deleted file mode 100644
index d3a193f7..00000000
--- a/superlu/BLAS/ssymm.f
+++ /dev/null
@@ -1,367 +0,0 @@
-*> \brief \b SSYMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SSYMM  performs one of the matrix-matrix operations
-*>    C := alpha*A*B + beta*C,
-*> or
-*>    C := alpha*B*A + beta*C,
-*> where alpha and beta are scalars,  A is a symmetric matrix and  B and
-*> C are  m by n matrices.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  symmetric matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  symmetric  matrix   A  is  to  be
-*>           referenced as follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is  n otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is REAL array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is REAL array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     Set NROWA as the number of rows of A.
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     And when
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSAME(SIDE,'L')) THEN
-*        Form  C := alpha*A*B + beta*C.
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*B*A + beta*C.
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*A(J,J)
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*A(J,K)
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(J,K)
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-      RETURN
-*     End of SSYMM .
-      END
diff --git a/superlu/BLAS/ssymv.f b/superlu/BLAS/ssymv.f
deleted file mode 100644
index a1fa54f1..00000000
--- a/superlu/BLAS/ssymv.f
+++ /dev/null
@@ -1,333 +0,0 @@
-*> \brief \b SSYMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SSYMV  performs the matrix-vector  operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when A is stored in upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*A(J,J) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 I = 1,J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*A(J,J) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when A is stored in lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*A(J,J)
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*A(J,J)
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,N
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SSYMV .
-      END
diff --git a/superlu/BLAS/ssyr.f b/superlu/BLAS/ssyr.f
deleted file mode 100644
index 9d73f868..00000000
--- a/superlu/BLAS/ssyr.f
+++ /dev/null
@@ -1,263 +0,0 @@
-*> \brief \b SSYR
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SSYR   performs the symmetric rank 1 operation
-*>    A := alpha*x*x**T + A,
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n symmetric matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSYR  ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set the start point in X if the increment is not unity.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when A is stored in upper triangle.
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      DO 10 I = 1,J
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   10                 CONTINUE
-                  END IF
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = KX
-                      DO 30 I = 1,J
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when A is stored in lower triangle.
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      DO 50 I = J,N
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   50                 CONTINUE
-                  END IF
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = JX
-                      DO 70 I = J,N
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SSYR  .
-      END
diff --git a/superlu/BLAS/ssyr2.f b/superlu/BLAS/ssyr2.f
deleted file mode 100644
index a2a083ad..00000000
--- a/superlu/BLAS/ssyr2.f
+++ /dev/null
@@ -1,298 +0,0 @@
-*> \brief \b SSYR2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SSYR2  performs the symmetric rank 2 operation
-*>    A := alpha*x*y**T + alpha*y*x**T + A,
-*> where alpha is a scalar, x and y are n element vectors and A is an n
-*> by n symmetric matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSYR2 ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when A is stored in the upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      DO 10 I = 1,J
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   10                 CONTINUE
-                  END IF
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = KX
-                      IY = KY
-                      DO 30 I = 1,J
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when A is stored in the lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      DO 50 I = J,N
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   50                 CONTINUE
-                  END IF
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = JX
-                      IY = JY
-                      DO 70 I = J,N
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SSYR2 .
-      END
diff --git a/superlu/BLAS/ssyr2k.f b/superlu/BLAS/ssyr2k.f
deleted file mode 100644
index 4a705f79..00000000
--- a/superlu/BLAS/ssyr2k.f
+++ /dev/null
@@ -1,399 +0,0 @@
-*> \brief \b SSYR2K
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SSYR2K  performs one of the symmetric rank 2k operations
-*>    C := alpha*A*B**T + alpha*B*A**T + beta*C,
-*> or
-*>    C := alpha*A**T*B + alpha*B**T*A + beta*C,
-*> where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-*> and  A and B  are  n by k  matrices  in the  first  case  and  k by n
-*> matrices in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   C := alpha*A*B**T + alpha*B*A**T +
-*>                                        beta*C.
-*>              TRANS = 'T' or 't'   C := alpha*A**T*B + alpha*B**T*A +
-*>                                        beta*C.
-*>              TRANS = 'C' or 'c'   C := alpha*A**T*B + alpha*B**T*A +
-*>                                        beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-*>           of rows of the matrices  A and B.  K must be at least  zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is REAL array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is REAL array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'T')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*B**T + alpha*B*A**T + C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**T*B + alpha*B**T*A + C.
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SSYR2K.
-      END
diff --git a/superlu/BLAS/ssyrk.f b/superlu/BLAS/ssyrk.f
deleted file mode 100644
index ecb1e4f1..00000000
--- a/superlu/BLAS/ssyrk.f
+++ /dev/null
@@ -1,364 +0,0 @@
-*> \brief \b SSYRK
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> SSYRK  performs one of the symmetric rank k operations
-*>    C := alpha*A*A**T + beta*C,
-*> or
-*>    C := alpha*A**T*A + beta*C,
-*> where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-*> and  A  is an  n by k  matrix in the first case and a  k by n  matrix
-*> in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**T + beta*C.
-*>              TRANS = 'T' or 't'   C := alpha*A**T*A + beta*C.
-*>              TRANS = 'C' or 'c'   C := alpha*A**T*A + beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-*>           of rows of the matrix  A.  K must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is REAL array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'T')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*A**T + beta*C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**T*A + beta*C.
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP = ZERO
-                      DO 220 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of SSYRK .
-      END
diff --git a/superlu/BLAS/stbmv.f b/superlu/BLAS/stbmv.f
deleted file mode 100644
index 4323864e..00000000
--- a/superlu/BLAS/stbmv.f
+++ /dev/null
@@ -1,398 +0,0 @@
-*> \brief \b STBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> STBMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX   too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*         Form  x := A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = KPLUS1 - J
-                          DO 10 I = MAX(1,J-K),J - 1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = KPLUS1 - J
-                          DO 30 I = MAX(1,J-K),J - 1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
-                      END IF
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = 1 - J
-                          DO 50 I = MIN(N,J+K),J + 1,-1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(1,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = 1 - J
-                          DO 70 I = MIN(N,J+K),J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(1,J)
-                      END IF
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                      DO 90 I = J - 1,MAX(1,J-K),-1
-                          TEMP = TEMP + A(L+I,J)*X(I)
-   90                 CONTINUE
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      KX = KX - INCX
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                      DO 110 I = J - 1,MAX(1,J-K),-1
-                          TEMP = TEMP + A(L+I,J)*X(IX)
-                          IX = IX - INCX
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(1,J)
-                      DO 130 I = J + 1,MIN(N,J+K)
-                          TEMP = TEMP + A(L+I,J)*X(I)
-  130                 CONTINUE
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      KX = KX + INCX
-                      IX = KX
-                      L = 1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(1,J)
-                      DO 150 I = J + 1,MIN(N,J+K)
-                          TEMP = TEMP + A(L+I,J)*X(IX)
-                          IX = IX + INCX
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of STBMV .
-      END
diff --git a/superlu/BLAS/stbsv.f b/superlu/BLAS/stbsv.f
deleted file mode 100644
index 00aaeba6..00000000
--- a/superlu/BLAS/stbsv.f
+++ /dev/null
@@ -1,401 +0,0 @@
-*> \brief \b STBSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> STBSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular band matrix, with ( k + 1 )
-*> diagonals.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed by sequentially with one pass through A.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(KPLUS1,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,MAX(1,J-K),-1
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 40 J = N,1,-1
-                      KX = KX - INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(KPLUS1,J)
-                          TEMP = X(JX)
-                          DO 30 I = J - 1,MAX(1,J-K),-1
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX - INCX
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          L = 1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(1,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,MIN(N,J+K)
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      KX = KX + INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = 1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(1,J)
-                          TEMP = X(JX)
-                          DO 70 I = J + 1,MIN(N,J+K)
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX + INCX
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T)*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      DO 90 I = MAX(1,J-K),J - 1
-                          TEMP = TEMP - A(L+I,J)*X(I)
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      L = KPLUS1 - J
-                      DO 110 I = MAX(1,J-K),J - 1
-                          TEMP = TEMP - A(L+I,J)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      L = 1 - J
-                      DO 130 I = MIN(N,J+K),J + 1,-1
-                          TEMP = TEMP - A(L+I,J)*X(I)
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      L = 1 - J
-                      DO 150 I = MIN(N,J+K),J + 1,-1
-                          TEMP = TEMP - A(L+I,J)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of STBSV .
-      END
diff --git a/superlu/BLAS/stpmv.f b/superlu/BLAS/stpmv.f
deleted file mode 100644
index 765e7f91..00000000
--- a/superlu/BLAS/stpmv.f
+++ /dev/null
@@ -1,352 +0,0 @@
-*> \brief \b STPMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> STPMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is REAL array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL AP(*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*        Form  x:= A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K + 1
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK+J-1)
-                      END IF
-                      KK = KK + J
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 K = KK,KK + J - 2
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1)
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K - 1
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK-N+J)
-                      END IF
-                      KK = KK - (N-J+1)
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 K = KK,KK - (N- (J+1)),-1
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J)
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      K = KK - 1
-                      DO 90 I = J - 1,1,-1
-                          TEMP = TEMP + AP(K)*X(I)
-                          K = K - 1
-   90                 CONTINUE
-                      X(J) = TEMP
-                      KK = KK - J
-  100             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      DO 110 K = KK - 1,KK - J + 1,-1
-                          IX = IX - INCX
-                          TEMP = TEMP + AP(K)*X(IX)
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - J
-  120             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      K = KK + 1
-                      DO 130 I = J + 1,N
-                          TEMP = TEMP + AP(K)*X(I)
-                          K = K + 1
-  130                 CONTINUE
-                      X(J) = TEMP
-                      KK = KK + (N-J+1)
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      DO 150 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          TEMP = TEMP + AP(K)*X(IX)
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of STPMV .
-      END
diff --git a/superlu/BLAS/stpsv.f b/superlu/BLAS/stpsv.f
deleted file mode 100644
index 5a29776d..00000000
--- a/superlu/BLAS/stpsv.f
+++ /dev/null
@@ -1,354 +0,0 @@
-*> \brief \b STPSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> STPSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix, supplied in packed form.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is REAL array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL AP(*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK - 1
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K - 1
-   10                     CONTINUE
-                      END IF
-                      KK = KK - J
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK + 1
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K + 1
-   50                     CONTINUE
-                      END IF
-                      KK = KK + (N-J+1)
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      K = KK
-                      DO 90 I = 1,J - 1
-                          TEMP = TEMP - AP(K)*X(I)
-                          K = K + 1
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      X(J) = TEMP
-                      KK = KK + J
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 110 K = KK,KK + J - 2
-                          TEMP = TEMP - AP(K)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + J
-  120             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK
-                      DO 130 I = N,J + 1,-1
-                          TEMP = TEMP - AP(K)*X(I)
-                          K = K - 1
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      X(J) = TEMP
-                      KK = KK - (N-J+1)
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 150 K = KK,KK - (N- (J+1)),-1
-                          TEMP = TEMP - AP(K)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of STPSV .
-      END
diff --git a/superlu/BLAS/strmm.f b/superlu/BLAS/strmm.f
deleted file mode 100644
index dd208721..00000000
--- a/superlu/BLAS/strmm.f
+++ /dev/null
@@ -1,415 +0,0 @@
-*> \brief \b STRMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),B(LDB,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> STRMM  performs one of the matrix-matrix operations
-*>    B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
-*> where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>    op( A ) = A   or   op( A ) = A**T.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE specifies whether  op( A ) multiplies B from
-*>           the left or right as follows:
-*>              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
-*>              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>              TRANSA = 'C' or 'c'   op( A ) = A**T.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, k ), where k is m
-*>           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*> \param[in,out] B
-*> \verbatim
-*>          B is REAL array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain the matrix  B,  and  on exit  is overwritten  by the
-*>           transformed matrix.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),B(LDB,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     Test the input parameters.
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-          INFO = 2
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-          INFO = 9
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*     And when
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*A*B.
-              IF (UPPER) THEN
-                  DO 50 J = 1,N
-                      DO 40 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              DO 30 I = 1,K - 1
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   30                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP*A(K,K)
-                              B(K,J) = TEMP
-                          END IF
-   40                 CONTINUE
-   50             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              B(K,J) = TEMP
-                              IF (NOUNIT) B(K,J) = B(K,J)*A(K,K)
-                              DO 60 I = K + 1,M
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   60                         CONTINUE
-                          END IF
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*A**T*B.
-              IF (UPPER) THEN
-                  DO 110 J = 1,N
-                      DO 100 I = M,1,-1
-                          TEMP = B(I,J)
-                          IF (NOUNIT) TEMP = TEMP*A(I,I)
-                          DO 90 K = 1,I - 1
-                              TEMP = TEMP + A(K,I)*B(K,J)
-   90                     CONTINUE
-                          B(I,J) = ALPHA*TEMP
-  100                 CONTINUE
-  110             CONTINUE
-              ELSE
-                  DO 140 J = 1,N
-                      DO 130 I = 1,M
-                          TEMP = B(I,J)
-                          IF (NOUNIT) TEMP = TEMP*A(I,I)
-                          DO 120 K = I + 1,M
-                              TEMP = TEMP + A(K,I)*B(K,J)
-  120                     CONTINUE
-                          B(I,J) = ALPHA*TEMP
-  130                 CONTINUE
-  140             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*B*A.
-              IF (UPPER) THEN
-                  DO 180 J = N,1,-1
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 150 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  150                 CONTINUE
-                      DO 170 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 160 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  160                         CONTINUE
-                          END IF
-  170                 CONTINUE
-  180             CONTINUE
-              ELSE
-                  DO 220 J = 1,N
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 190 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  190                 CONTINUE
-                      DO 210 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 200 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  200                         CONTINUE
-                          END IF
-  210                 CONTINUE
-  220             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*B*A**T.
-              IF (UPPER) THEN
-                  DO 260 K = 1,N
-                      DO 240 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = ALPHA*A(J,K)
-                              DO 230 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  230                         CONTINUE
-                          END IF
-  240                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(K,K)
-                      IF (TEMP.NE.ONE) THEN
-                          DO 250 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  250                     CONTINUE
-                      END IF
-  260             CONTINUE
-              ELSE
-                  DO 300 K = N,1,-1
-                      DO 280 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = ALPHA*A(J,K)
-                              DO 270 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  270                         CONTINUE
-                          END IF
-  280                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(K,K)
-                      IF (TEMP.NE.ONE) THEN
-                          DO 290 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  290                     CONTINUE
-                      END IF
-  300             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of STRMM .
-      END
diff --git a/superlu/BLAS/strmv.f b/superlu/BLAS/strmv.f
deleted file mode 100644
index ba3d7b6a..00000000
--- a/superlu/BLAS/strmv.f
+++ /dev/null
@@ -1,342 +0,0 @@
-*> \brief \b STRMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> STRMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 6
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*        Form  x := A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*A(I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 I = 1,J - 1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*A(I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 I = N,J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 90 I = J - 1,1,-1
-                          TEMP = TEMP + A(I,J)*X(I)
-   90                 CONTINUE
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 110 I = J - 1,1,-1
-                          IX = IX - INCX
-                          TEMP = TEMP + A(I,J)*X(IX)
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 130 I = J + 1,N
-                          TEMP = TEMP + A(I,J)*X(I)
-  130                 CONTINUE
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 150 I = J + 1,N
-                          IX = IX + INCX
-                          TEMP = TEMP + A(I,J)*X(IX)
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of STRMV .
-      END
diff --git a/superlu/BLAS/strsm.f b/superlu/BLAS/strsm.f
deleted file mode 100644
index f2927fe3..00000000
--- a/superlu/BLAS/strsm.f
+++ /dev/null
@@ -1,443 +0,0 @@
-*> \brief \b STRSM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),B(LDB,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> STRSM  solves one of the matrix equations
-*>    op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
-*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>    op( A ) = A   or   op( A ) = A**T.
-*> The matrix X is overwritten on B.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry, SIDE specifies whether op( A ) appears on the left
-*>           or right of X as follows:
-*>              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
-*>              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>              TRANSA = 'C' or 'c'   op( A ) = A**T.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, k ),
-*>           where k is m when SIDE = 'L' or 'l'
-*>             and k is n when SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*> \param[in,out] B
-*> \verbatim
-*>          B is REAL array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain  the  right-hand  side  matrix  B,  and  on exit  is
-*>           overwritten by the solution matrix  X.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),B(LDB,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     Test the input parameters.
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-          INFO = 2
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-          INFO = 9
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*     And when
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*inv( A )*B.
-              IF (UPPER) THEN
-                  DO 60 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 30 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   30                     CONTINUE
-                      END IF
-                      DO 50 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 40 I = 1,K - 1
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   40                         CONTINUE
-                          END IF
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 100 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 70 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   70                     CONTINUE
-                      END IF
-                      DO 90 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 80 I = K + 1,M
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   80                         CONTINUE
-                          END IF
-   90                 CONTINUE
-  100             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*inv( A**T )*B.
-              IF (UPPER) THEN
-                  DO 130 J = 1,N
-                      DO 120 I = 1,M
-                          TEMP = ALPHA*B(I,J)
-                          DO 110 K = 1,I - 1
-                              TEMP = TEMP - A(K,I)*B(K,J)
-  110                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          B(I,J) = TEMP
-  120                 CONTINUE
-  130             CONTINUE
-              ELSE
-                  DO 160 J = 1,N
-                      DO 150 I = M,1,-1
-                          TEMP = ALPHA*B(I,J)
-                          DO 140 K = I + 1,M
-                              TEMP = TEMP - A(K,I)*B(K,J)
-  140                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          B(I,J) = TEMP
-  150                 CONTINUE
-  160             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*B*inv( A ).
-              IF (UPPER) THEN
-                  DO 210 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 170 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  170                     CONTINUE
-                      END IF
-                      DO 190 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 180 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  180                         CONTINUE
-                          END IF
-  190                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 200 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  200                     CONTINUE
-                      END IF
-  210             CONTINUE
-              ELSE
-                  DO 260 J = N,1,-1
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 220 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  220                     CONTINUE
-                      END IF
-                      DO 240 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 230 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  230                         CONTINUE
-                          END IF
-  240                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 250 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  250                     CONTINUE
-                      END IF
-  260             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*B*inv( A**T ).
-              IF (UPPER) THEN
-                  DO 310 K = N,1,-1
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(K,K)
-                          DO 270 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  270                     CONTINUE
-                      END IF
-                      DO 290 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = A(J,K)
-                              DO 280 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  280                         CONTINUE
-                          END IF
-  290                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 300 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  300                     CONTINUE
-                      END IF
-  310             CONTINUE
-              ELSE
-                  DO 360 K = 1,N
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(K,K)
-                          DO 320 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  320                     CONTINUE
-                      END IF
-                      DO 340 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = A(J,K)
-                              DO 330 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  330                         CONTINUE
-                          END IF
-  340                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 350 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  350                     CONTINUE
-                      END IF
-  360             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of STRSM .
-      END
diff --git a/superlu/BLAS/strsv.f b/superlu/BLAS/strsv.f
deleted file mode 100644
index a31651b9..00000000
--- a/superlu/BLAS/strsv.f
+++ /dev/null
@@ -1,344 +0,0 @@
-*> \brief \b STRSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> STRSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup single_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 6
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*A(I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 I = J - 1,1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*A(I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 I = J + 1,N
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      DO 90 I = 1,J - 1
-                          TEMP = TEMP - A(I,J)*X(I)
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 110 I = 1,J - 1
-                          TEMP = TEMP - A(I,J)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      DO 130 I = N,J + 1,-1
-                          TEMP = TEMP - A(I,J)*X(I)
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 150 I = N,J + 1,-1
-                          TEMP = TEMP - A(I,J)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of STRSV .
-      END
diff --git a/superlu/BLAS/xerbla.f b/superlu/BLAS/xerbla.f
deleted file mode 100644
index bbe6cceb..00000000
--- a/superlu/BLAS/xerbla.f
+++ /dev/null
@@ -1,89 +0,0 @@
-*> \brief \b XERBLA
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       CHARACTER*(*)      SRNAME
-*       INTEGER            INFO
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> XERBLA  is an error handler for the LAPACK routines.
-*> It is called by an LAPACK routine if an input parameter has an
-*> invalid value.  A message is printed and execution stops.
-*> Installers may consider modifying the STOP statement in order to
-*> call system-specific exception-handling facilities.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SRNAME
-*> \verbatim
-*>          SRNAME is CHARACTER*(*)
-*>          The name of the routine which called XERBLA.
-*> \endverbatim
-*> \param[in] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          The position of the invalid parameter in the parameter list
-*>          of the calling routine.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup aux_blas
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      CHARACTER*(*)      SRNAME
-      INTEGER            INFO
-*     ..
-* =====================================================================
-*     .. Intrinsic Functions ..
-      INTRINSIC          LEN_TRIM
-*     ..
-*     .. Executable Statements ..
-      WRITE( *, FMT = 9999 )SRNAME( 1:LEN_TRIM( SRNAME ) ), INFO
-      STOP
- 9999 FORMAT( ' ** On entry to ', A, ' parameter number ', I2, ' had ',
-     $      'an illegal value' )
-*     End of XERBLA
-      END
diff --git a/superlu/BLAS/xerbla_array.f b/superlu/BLAS/xerbla_array.f
deleted file mode 100644
index df4e6273..00000000
--- a/superlu/BLAS/xerbla_array.f
+++ /dev/null
@@ -1,119 +0,0 @@
-*> \brief \b XERBLA_ARRAY
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> XERBLA_ARRAY assists other languages in calling XERBLA, the LAPACK
-*> and BLAS error handler.  Rather than taking a Fortran string argument
-*> as the function's name, XERBLA_ARRAY takes an array of single
-*> characters along with the array's length.  XERBLA_ARRAY then copies
-*> up to 32 characters of that array into a Fortran string and passes
-*> that to XERBLA.  If called with a non-positive SRNAME_LEN,
-*> XERBLA_ARRAY will call XERBLA with a string of all blank characters.
-*> Say some macro or other device makes XERBLA_ARRAY available to C99
-*> by a name lapack_xerbla and with a common Fortran calling convention.
-*> Then a C99 program could invoke XERBLA via:
-*>    {
-*>      int flen = strlen(__func__);
-*>      lapack_xerbla(__func__, &flen, &info);
-*>    }
-*> Providing XERBLA_ARRAY is not necessary for intercepting LAPACK
-*> errors.  XERBLA_ARRAY calls XERBLA.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SRNAME_ARRAY
-*> \verbatim
-*>          SRNAME_ARRAY is CHARACTER(1) array, dimension (SRNAME_LEN)
-*>          The name of the routine which called XERBLA_ARRAY.
-*> \endverbatim
-*> \param[in] SRNAME_LEN
-*> \verbatim
-*>          SRNAME_LEN is INTEGER
-*>          The length of the name in SRNAME_ARRAY.
-*> \endverbatim
-*> \param[in] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          The position of the invalid parameter in the parameter list
-*>          of the calling routine.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup aux_blas
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-*     ..
-* =====================================================================
-*     ..
-*     .. Local Scalars ..
-      INTEGER I
-*     ..
-*     .. Local Arrays ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. Executable Statements ..
-      SRNAME = ''
-      DO I = 1, MIN( SRNAME_LEN, LEN( SRNAME ) )
-         SRNAME( I:I ) = SRNAME_ARRAY( I )
-      END DO
-      RETURN
-      END
diff --git a/superlu/BLAS/zaxpy.f b/superlu/BLAS/zaxpy.f
deleted file mode 100644
index bca78fb7..00000000
--- a/superlu/BLAS/zaxpy.f
+++ /dev/null
@@ -1,102 +0,0 @@
-*> \brief \b ZAXPY
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ZA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*),ZY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    ZAXPY constant times a vector plus a vector.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      COMPLEX*16 ZA
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*),ZY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (DCABS1(ZA).EQ.0.0d0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-         DO I = 1,N
-            ZY(I) = ZY(I) + ZA*ZX(I)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            ZY(IY) = ZY(IY) + ZA*ZX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zcopy.f b/superlu/BLAS/zcopy.f
deleted file mode 100644
index 830548ab..00000000
--- a/superlu/BLAS/zcopy.f
+++ /dev/null
@@ -1,94 +0,0 @@
-*> \brief \b ZCOPY
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*),ZY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    ZCOPY copies a vector, x, to a vector, y.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, linpack, 4/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*),ZY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-         DO I = 1,N
-          ZY(I) = ZX(I)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            ZY(IY) = ZX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zdotc.f b/superlu/BLAS/zdotc.f
deleted file mode 100644
index 70119ec5..00000000
--- a/superlu/BLAS/zdotc.f
+++ /dev/null
@@ -1,103 +0,0 @@
-*> \brief \b ZDOTC
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*),ZY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZDOTC forms the dot product of two complex vectors
-*>      ZDOTC = X^H * Y
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*),ZY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      ZTEMP = (0.0d0,0.0d0)
-      ZDOTC = (0.0d0,0.0d0)
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-         DO I = 1,N
-            ZTEMP = ZTEMP + DCONJG(ZX(I))*ZY(I)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            ZTEMP = ZTEMP + DCONJG(ZX(IX))*ZY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      ZDOTC = ZTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/zdotu.f b/superlu/BLAS/zdotu.f
deleted file mode 100644
index 318fae24..00000000
--- a/superlu/BLAS/zdotu.f
+++ /dev/null
@@ -1,100 +0,0 @@
-*> \brief \b ZDOTU
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*),ZY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZDOTU forms the dot product of two complex vectors
-*>      ZDOTU = X^T * Y
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*),ZY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-      ZTEMP = (0.0d0,0.0d0)
-      ZDOTU = (0.0d0,0.0d0)
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*        code for both increments equal to 1
-         DO I = 1,N
-            ZTEMP = ZTEMP + ZX(I)*ZY(I)
-         END DO
-      ELSE
-*        code for unequal increments or equal increments
-*          not equal to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            ZTEMP = ZTEMP + ZX(IX)*ZY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      ZDOTU = ZTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/zdrot.f b/superlu/BLAS/zdrot.f
deleted file mode 100644
index 8a4cf652..00000000
--- a/superlu/BLAS/zdrot.f
+++ /dev/null
@@ -1,153 +0,0 @@
-*> \brief \b ZDROT
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       INTEGER            INCX, INCY, N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16         CX( * ), CY( * )
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> Applies a plane rotation, where the cos and sin (c and s) are real
-*> and the vectors cx and cy are complex.
-*> jack dongarra, linpack, 3/11/78.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the vectors cx and cy.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in,out] CX
-*> \verbatim
-*>          CX is COMPLEX*16 array, dimension at least
-*>           ( 1 + ( N - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array CX must contain the n
-*>           element vector cx. On exit, CX is overwritten by the updated
-*>           vector cx.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           CX. INCX must not be zero.
-*> \endverbatim
-*> \param[in,out] CY
-*> \verbatim
-*>          CY is COMPLEX*16 array, dimension at least
-*>           ( 1 + ( N - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array CY must contain the n
-*>           element vector cy. On exit, CY is overwritten by the updated
-*>           vector cy.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           CY. INCY must not be zero.
-*> \endverbatim
-*> \param[in] C
-*> \verbatim
-*>          C is DOUBLE PRECISION
-*>           On entry, C specifies the cosine, cos.
-*> \endverbatim
-*> \param[in] S
-*> \verbatim
-*>          S is DOUBLE PRECISION
-*>           On entry, S specifies the sine, sin.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      INTEGER            INCX, INCY, N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16         CX( * ), CY( * )
-*     ..
-* =====================================================================
-*     .. Local Scalars ..
-      INTEGER            I, IX, IY
-      COMPLEX*16         CTEMP
-*     ..
-*     .. Executable Statements ..
-      IF( N.LE.0 )
-     $   RETURN
-      IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN
-*        code for both increments equal to 1
-         DO I = 1, N
-            CTEMP = C*CX( I ) + S*CY( I )
-            CY( I ) = C*CY( I ) - S*CX( I )
-            CX( I ) = CTEMP
-         END DO
-      ELSE
-*        code for unequal increments or equal increments not equal
-*          to 1
-         IX = 1
-         IY = 1
-         IF( INCX.LT.0 )
-     $      IX = ( -N+1 )*INCX + 1
-         IF( INCY.LT.0 )
-     $      IY = ( -N+1 )*INCY + 1
-         DO I = 1, N
-            CTEMP = C*CX( IX ) + S*CY( IY )
-            CY( IY ) = C*CY( IY ) - S*CX( IX )
-            CX( IX ) = CTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zdscal.f b/superlu/BLAS/zdscal.f
deleted file mode 100644
index def90785..00000000
--- a/superlu/BLAS/zdscal.f
+++ /dev/null
@@ -1,94 +0,0 @@
-*> \brief \b ZDSCAL
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    ZDSCAL scales a vector by a constant.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-         DO I = 1,N
-            ZX(I) = DCMPLX(DA,0.0d0)*ZX(I)
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            ZX(I) = DCMPLX(DA,0.0d0)*ZX(I)
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zgbmv.f b/superlu/BLAS/zgbmv.f
deleted file mode 100644
index f49da221..00000000
--- a/superlu/BLAS/zgbmv.f
+++ /dev/null
@@ -1,390 +0,0 @@
-*> \brief \b ZGBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZGBMV  performs one of the matrix-vector operations
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,   or
-*>    y := alpha*A**H*x + beta*y,
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n band matrix, with kl sub-diagonals and ku super-diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>              TRANS = 'C' or 'c'   y := alpha*A**H*x + beta*y.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] KL
-*> \verbatim
-*>          KL is INTEGER
-*>           On entry, KL specifies the number of sub-diagonals of the
-*>           matrix A. KL must satisfy  0 .le. KL.
-*> \endverbatim
-*> \param[in] KU
-*> \verbatim
-*>          KU is INTEGER
-*>           On entry, KU specifies the number of super-diagonals of the
-*>           matrix A. KU must satisfy  0 .le. KU.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading ( kl + ku + 1 ) by n part of the
-*>           array A must contain the matrix of coefficients, supplied
-*>           column by column, with the leading diagonal of the matrix in
-*>           row ( ku + 1 ) of the array, the first super-diagonal
-*>           starting at position 2 in row ku, the first sub-diagonal
-*>           starting at position 1 in row ( ku + 2 ), and so on.
-*>           Elements in the array A that do not correspond to elements
-*>           in the band matrix (such as the top left ku by ku triangle)
-*>           are not referenced.
-*>           The following program segment will transfer a band matrix
-*>           from conventional full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    K = KU + 1 - J
-*>                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
-*>                       A( K + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( kl + ku + 1 ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (KL.LT.0) THEN
-          INFO = 4
-      ELSE IF (KU.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (KL+KU+1)) THEN
-          INFO = 8
-          INFO = 10
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the band part of A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      KUP1 = KU + 1
-*        Form  y := alpha*A*x + y.
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  K = KUP1 - J
-                  DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(I) = Y(I) + TEMP*A(K+I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  K = KUP1 - J
-                  DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(IY) = Y(IY) + TEMP*A(K+I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-                  IF (J.GT.KU) KY = KY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y := alpha*A**T*x + y  or  y := alpha*A**H*x + y.
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 110 J = 1,N
-                  TEMP = ZERO
-                  K = KUP1 - J
-                  IF (NOCONJ) THEN
-                      DO 90 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + A(K+I,J)*X(I)
-   90                 CONTINUE
-                  ELSE
-                      DO 100 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + DCONJG(A(K+I,J))*X(I)
-  100                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  110         CONTINUE
-          ELSE
-              DO 140 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  K = KUP1 - J
-                  IF (NOCONJ) THEN
-                      DO 120 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + A(K+I,J)*X(IX)
-                          IX = IX + INCX
-  120                 CONTINUE
-                  ELSE
-                      DO 130 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + DCONJG(A(K+I,J))*X(IX)
-                          IX = IX + INCX
-  130                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-                  IF (J.GT.KU) KX = KX + INCX
-  140         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZGBMV .
-      END
diff --git a/superlu/BLAS/zgemm.f b/superlu/BLAS/zgemm.f
deleted file mode 100644
index a1726321..00000000
--- a/superlu/BLAS/zgemm.f
+++ /dev/null
@@ -1,483 +0,0 @@
-*> \brief \b ZGEMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZGEMM  performs one of the matrix-matrix operations
-*>    C := alpha*op( A )*op( B ) + beta*C,
-*> where  op( X ) is one of
-*>    op( X ) = X   or   op( X ) = X**T   or   op( X ) = X**H,
-*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
-*> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n',  op( A ) = A.
-*>              TRANSA = 'T' or 't',  op( A ) = A**T.
-*>              TRANSA = 'C' or 'c',  op( A ) = A**H.
-*> \endverbatim
-*> \param[in] TRANSB
-*> \verbatim
-*>          TRANSB is CHARACTER*1
-*>           On entry, TRANSB specifies the form of op( B ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSB = 'N' or 'n',  op( B ) = B.
-*>              TRANSB = 'T' or 't',  op( B ) = B**T.
-*>              TRANSB = 'C' or 'c',  op( B ) = B**H.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies  the number  of rows  of the  matrix
-*>           op( A )  and of the  matrix  C.  M  must  be at least  zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N  specifies the number  of columns of the matrix
-*>           op( B ) and the number of columns of the matrix C. N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry,  K  specifies  the number of columns of the matrix
-*>           op( A ) and the number of rows of the matrix op( B ). K must
-*>           be at least  zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-*>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by m  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, k ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is
-*>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-*>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  n by k  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
-*>           LDB must be at least  max( 1, k ), otherwise  LDB must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n  matrix
-*>           ( alpha*op( A )*op( B ) + beta*C ).
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and
-*     B  respectively are to be  transposed but  not conjugated  and set
-*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
-*     and the number of rows of  B  respectively.
-      IF (NOTA) THEN
-          NROWA = M
-          NCOLA = K
-      ELSE
-          NROWA = K
-          NCOLA = M
-      END IF
-      IF (NOTB) THEN
-          NROWB = K
-      ELSE
-          NROWB = N
-      END IF
-*     Test the input parameters.
-      INFO = 0
-      IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND.
-     +    (.NOT.LSAME(TRANSA,'T'))) THEN
-          INFO = 1
-     +         (.NOT.LSAME(TRANSB,'T'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-          INFO = 8
-          INFO = 10
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-      IF (NOTB) THEN
-          IF (NOTA) THEN
-*           Form  C := alpha*A*B + beta*C.
-              DO 90 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 50 I = 1,M
-                          C(I,J) = ZERO
-   50                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 60 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-   60                 CONTINUE
-                  END IF
-                  DO 80 L = 1,K
-                      TEMP = ALPHA*B(L,J)
-                      DO 70 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-   70                 CONTINUE
-   80             CONTINUE
-   90         CONTINUE
-          ELSE IF (CONJA) THEN
-*           Form  C := alpha*A**H*B + beta*C.
-              DO 120 J = 1,N
-                  DO 110 I = 1,M
-                      TEMP = ZERO
-                      DO 100 L = 1,K
-                          TEMP = TEMP + DCONJG(A(L,I))*B(L,J)
-  100                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  110             CONTINUE
-  120         CONTINUE
-          ELSE
-*           Form  C := alpha*A**T*B + beta*C
-              DO 150 J = 1,N
-                  DO 140 I = 1,M
-                      TEMP = ZERO
-                      DO 130 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(L,J)
-  130                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  140             CONTINUE
-  150         CONTINUE
-          END IF
-          IF (CONJB) THEN
-*           Form  C := alpha*A*B**H + beta*C.
-              DO 200 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 160 I = 1,M
-                          C(I,J) = ZERO
-  160                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 170 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  170                 CONTINUE
-                  END IF
-                  DO 190 L = 1,K
-                      TEMP = ALPHA*DCONJG(B(J,L))
-                      DO 180 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  180                 CONTINUE
-  190             CONTINUE
-  200         CONTINUE
-          ELSE
-*           Form  C := alpha*A*B**T + beta*C
-              DO 250 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 210 I = 1,M
-                          C(I,J) = ZERO
-  210                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 220 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  220                 CONTINUE
-                  END IF
-                  DO 240 L = 1,K
-                      TEMP = ALPHA*B(J,L)
-                      DO 230 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  230                 CONTINUE
-  240             CONTINUE
-  250         CONTINUE
-          END IF
-          IF (CONJB) THEN
-*           Form  C := alpha*A**H*B**H + beta*C.
-              DO 280 J = 1,N
-                  DO 270 I = 1,M
-                      TEMP = ZERO
-                      DO 260 L = 1,K
-                          TEMP = TEMP + DCONJG(A(L,I))*DCONJG(B(J,L))
-  260                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  270             CONTINUE
-  280         CONTINUE
-          ELSE
-*           Form  C := alpha*A**H*B**T + beta*C
-              DO 310 J = 1,N
-                  DO 300 I = 1,M
-                      TEMP = ZERO
-                      DO 290 L = 1,K
-                          TEMP = TEMP + DCONJG(A(L,I))*B(J,L)
-  290                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  300             CONTINUE
-  310         CONTINUE
-          END IF
-      ELSE
-          IF (CONJB) THEN
-*           Form  C := alpha*A**T*B**H + beta*C
-              DO 340 J = 1,N
-                  DO 330 I = 1,M
-                      TEMP = ZERO
-                      DO 320 L = 1,K
-                          TEMP = TEMP + A(L,I)*DCONJG(B(J,L))
-  320                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  330             CONTINUE
-  340         CONTINUE
-          ELSE
-*           Form  C := alpha*A**T*B**T + beta*C
-              DO 370 J = 1,N
-                  DO 360 I = 1,M
-                      TEMP = ZERO
-                      DO 350 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(J,L)
-  350                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  360             CONTINUE
-  370         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZGEMM .
-      END
diff --git a/superlu/BLAS/zgemv.f b/superlu/BLAS/zgemv.f
deleted file mode 100644
index 01e44d46..00000000
--- a/superlu/BLAS/zgemv.f
+++ /dev/null
@@ -1,350 +0,0 @@
-*> \brief \b ZGEMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZGEMV  performs one of the matrix-vector operations
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,   or
-*>    y := alpha*A**H*x + beta*y,
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>              TRANS = 'C' or 'c'   y := alpha*A**H*x + beta*y.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry with BETA non-zero, the incremented array Y
-*>           must contain the vector y. On exit, Y is overwritten by the
-*>           updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-          INFO = 6
-          INFO = 8
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-*        Form  y := alpha*A*x + y.
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  DO 50 I = 1,M
-                      Y(I) = Y(I) + TEMP*A(I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  DO 70 I = 1,M
-                      Y(IY) = Y(IY) + TEMP*A(I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y := alpha*A**T*x + y  or  y := alpha*A**H*x + y.
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 110 J = 1,N
-                  TEMP = ZERO
-                  IF (NOCONJ) THEN
-                      DO 90 I = 1,M
-                          TEMP = TEMP + A(I,J)*X(I)
-   90                 CONTINUE
-                  ELSE
-                      DO 100 I = 1,M
-                          TEMP = TEMP + DCONJG(A(I,J))*X(I)
-  100                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  110         CONTINUE
-          ELSE
-              DO 140 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  IF (NOCONJ) THEN
-                      DO 120 I = 1,M
-                          TEMP = TEMP + A(I,J)*X(IX)
-                          IX = IX + INCX
-  120                 CONTINUE
-                  ELSE
-                      DO 130 I = 1,M
-                          TEMP = TEMP + DCONJG(A(I,J))*X(IX)
-                          IX = IX + INCX
-  130                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  140         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZGEMV .
-      END
diff --git a/superlu/BLAS/zgerc.f b/superlu/BLAS/zgerc.f
deleted file mode 100644
index cf8e17d3..00000000
--- a/superlu/BLAS/zgerc.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b ZGERC
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZGERC  performs the rank 1 operation
-*>    A := alpha*x*y**H + A,
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*DCONJG(Y(JY))
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*DCONJG(Y(JY))
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-      RETURN
-*     End of ZGERC .
-      END
diff --git a/superlu/BLAS/zgeru.f b/superlu/BLAS/zgeru.f
deleted file mode 100644
index d191740c..00000000
--- a/superlu/BLAS/zgeru.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b ZGERU
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZGERU  performs the rank 1 operation
-*>    A := alpha*x*y**T + A,
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-      RETURN
-*     End of ZGERU .
-      END
diff --git a/superlu/BLAS/zhbmv.f b/superlu/BLAS/zhbmv.f
deleted file mode 100644
index 87422152..00000000
--- a/superlu/BLAS/zhbmv.f
+++ /dev/null
@@ -1,380 +0,0 @@
-*> \brief \b ZHBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZHBMV  performs the matrix-vector  operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian band matrix, with k super-diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the band matrix A is being supplied as
-*>           follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  being supplied.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  being supplied.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry, K specifies the number of super-diagonals of the
-*>           matrix A. K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the hermitian matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer the upper
-*>           triangular part of a hermitian band matrix from conventional
-*>           full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the hermitian matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer the lower
-*>           triangular part of a hermitian band matrix from conventional
-*>           full matrix storage to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (K.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 6
-          INFO = 8
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of the array A
-*     are accessed sequentially with one pass through A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when upper triangle of A is stored.
-          KPLUS1 = K + 1
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  L = KPLUS1 - J
-                  DO 50 I = MAX(1,J-K),J - 1
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*DBLE(A(KPLUS1,J)) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  L = KPLUS1 - J
-                  DO 70 I = MAX(1,J-K),J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*DBLE(A(KPLUS1,J)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  IF (J.GT.K) THEN
-                      KX = KX + INCX
-                      KY = KY + INCY
-                  END IF
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when lower triangle of A is stored.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*DBLE(A(1,J))
-                  L = 1 - J
-                  DO 90 I = J + 1,MIN(N,J+K)
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*DBLE(A(1,J))
-                  L = 1 - J
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,MIN(N,J+K)
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZHBMV .
-      END
diff --git a/superlu/BLAS/zhemm.f b/superlu/BLAS/zhemm.f
deleted file mode 100644
index 45a5eabd..00000000
--- a/superlu/BLAS/zhemm.f
+++ /dev/null
@@ -1,371 +0,0 @@
-*> \brief \b ZHEMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZHEMM  performs one of the matrix-matrix operations
-*>    C := alpha*A*B + beta*C,
-*> or
-*>    C := alpha*B*A + beta*C,
-*> where alpha and beta are scalars, A is an hermitian matrix and  B and
-*> C are m by n matrices.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  hermitian matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  hermitian  matrix   A  is  to  be
-*>           referenced as follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  hermitian matrix is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  hermitian matrix is to be referenced.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  hermitian matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  hermitian matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  hermitian
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  hermitian matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  hermitian matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  hermitian
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Note that the imaginary parts  of the diagonal elements need
-*>           not be set, they are assumed to be zero.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least max( 1, n ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     Set NROWA as the number of rows of A.
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     And when
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSAME(SIDE,'L')) THEN
-*        Form  C := alpha*A*B + beta*C.
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*DCONJG(A(K,I))
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*DBLE(A(I,I)) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*DBLE(A(I,I)) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*DCONJG(A(K,I))
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*DBLE(A(I,I)) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*DBLE(A(I,I)) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*B*A + beta*C.
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*DBLE(A(J,J))
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*DCONJG(A(J,K))
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*DCONJG(A(J,K))
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-      RETURN
-*     End of ZHEMM .
-      END
diff --git a/superlu/BLAS/zhemv.f b/superlu/BLAS/zhemv.f
deleted file mode 100644
index 37917459..00000000
--- a/superlu/BLAS/zhemv.f
+++ /dev/null
@@ -1,337 +0,0 @@
-*> \brief \b ZHEMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZHEMV  performs the matrix-vector  operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when A is stored in upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(I,J))*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*DBLE(A(J,J)) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 I = 1,J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(I,J))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*DBLE(A(J,J)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when A is stored in lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*DBLE(A(J,J))
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(I,J))*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*DBLE(A(J,J))
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,N
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(I,J))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZHEMV .
-      END
diff --git a/superlu/BLAS/zher.f b/superlu/BLAS/zher.f
deleted file mode 100644
index f7def760..00000000
--- a/superlu/BLAS/zher.f
+++ /dev/null
@@ -1,278 +0,0 @@
-*> \brief \b ZHER
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZHER   performs the hermitian rank 1 operation
-*>    A := alpha*x*x**H + A,
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n hermitian matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHER  ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set the start point in X if the increment is not unity.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when A is stored in upper triangle.
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(J))
-                      DO 10 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   10                 CONTINUE
-                      A(J,J) = DBLE(A(J,J)) + DBLE(X(J)*TEMP)
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(JX))
-                      IX = KX
-                      DO 30 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                      A(J,J) = DBLE(A(J,J)) + DBLE(X(JX)*TEMP)
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-                  JX = JX + INCX
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when A is stored in lower triangle.
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(J))
-                      A(J,J) = DBLE(A(J,J)) + DBLE(TEMP*X(J))
-                      DO 50 I = J + 1,N
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   50                 CONTINUE
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(JX))
-                      A(J,J) = DBLE(A(J,J)) + DBLE(TEMP*X(JX))
-                      IX = JX
-                      DO 70 I = J + 1,N
-                          IX = IX + INCX
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-   70                 CONTINUE
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZHER  .
-      END
diff --git a/superlu/BLAS/zher2.f b/superlu/BLAS/zher2.f
deleted file mode 100644
index 94c132c4..00000000
--- a/superlu/BLAS/zher2.f
+++ /dev/null
@@ -1,317 +0,0 @@
-*> \brief \b ZHER2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZHER2  performs the hermitian rank 2 operation
-*>    A := alpha*x*y**H + conjg( alpha )*y*x**H + A,
-*> where alpha is a scalar, x and y are n element vectors and A is an n
-*> by n hermitian matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHER2 ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when A is stored in the upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(J))
-                      TEMP2 = DCONJG(ALPHA*X(J))
-                      DO 10 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   10                 CONTINUE
-                      A(J,J) = DBLE(A(J,J)) +
-     +                         DBLE(X(J)*TEMP1+Y(J)*TEMP2)
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(JY))
-                      TEMP2 = DCONJG(ALPHA*X(JX))
-                      IX = KX
-                      IY = KY
-                      DO 30 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                      A(J,J) = DBLE(A(J,J)) +
-     +                         DBLE(X(JX)*TEMP1+Y(JY)*TEMP2)
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when A is stored in the lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(J))
-                      TEMP2 = DCONJG(ALPHA*X(J))
-                      A(J,J) = DBLE(A(J,J)) +
-     +                         DBLE(X(J)*TEMP1+Y(J)*TEMP2)
-                      DO 50 I = J + 1,N
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   50                 CONTINUE
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(JY))
-                      TEMP2 = DCONJG(ALPHA*X(JX))
-                      A(J,J) = DBLE(A(J,J)) +
-     +                         DBLE(X(JX)*TEMP1+Y(JY)*TEMP2)
-                      IX = JX
-                      IY = JY
-                      DO 70 I = J + 1,N
-                          IX = IX + INCX
-                          IY = IY + INCY
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-   70                 CONTINUE
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZHER2 .
-      END
diff --git a/superlu/BLAS/zher2k.f b/superlu/BLAS/zher2k.f
deleted file mode 100644
index 407e8db5..00000000
--- a/superlu/BLAS/zher2k.f
+++ /dev/null
@@ -1,443 +0,0 @@
-*> \brief \b ZHER2K
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZHER2K  performs one of the hermitian rank 2k operations
-*>    C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,
-*> or
-*>    C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,
-*> where  alpha and beta  are scalars with  beta  real,  C is an  n by n
-*> hermitian matrix and  A and B  are  n by k matrices in the first case
-*> and  k by n  matrices in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'    C := alpha*A*B**H          +
-*>                                         conjg( alpha )*B*A**H +
-*>                                         beta*C.
-*>              TRANS = 'C' or 'c'    C := alpha*A**H*B          +
-*>                                         conjg( alpha )*B**H*A +
-*>                                         beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
-*>           matrices  A and B.  K must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16 .
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*>           Unchanged on exit.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION .
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set,  they are assumed to be zero,  and on exit they
-*>           are set to zero.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*>  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
-*>     Ed Anderson, Cray Research Inc.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0)
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.DBLE(ZERO)) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-                      C(J,J) = BETA*DBLE(C(J,J))
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.DBLE(ZERO)) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      C(J,J) = BETA*DBLE(C(J,J))
-                      DO 70 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*B**H + conjg( alpha )*B*A**H +
-*                   C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.DBLE(ZERO)) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                      C(J,J) = BETA*DBLE(C(J,J))
-                  ELSE
-                      C(J,J) = DBLE(C(J,J))
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*DCONJG(B(J,L))
-                          TEMP2 = DCONJG(ALPHA*A(J,L))
-                          DO 110 I = 1,J - 1
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                          C(J,J) = DBLE(C(J,J)) +
-     +                             DBLE(A(J,L)*TEMP1+B(J,L)*TEMP2)
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.DBLE(ZERO)) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                      C(J,J) = BETA*DBLE(C(J,J))
-                  ELSE
-                      C(J,J) = DBLE(C(J,J))
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*DCONJG(B(J,L))
-                          TEMP2 = DCONJG(ALPHA*A(J,L))
-                          DO 160 I = J + 1,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                          C(J,J) = DBLE(C(J,J)) +
-     +                             DBLE(A(J,L)*TEMP1+B(J,L)*TEMP2)
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**H*B + conjg( alpha )*B**H*A +
-*                   C.
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + DCONJG(A(L,I))*B(L,J)
-                          TEMP2 = TEMP2 + DCONJG(B(L,I))*A(L,J)
-  190                 CONTINUE
-                      IF (I.EQ.J) THEN
-                          IF (BETA.EQ.DBLE(ZERO)) THEN
-                              C(J,J) = DBLE(ALPHA*TEMP1+
-     +                                 DCONJG(ALPHA)*TEMP2)
-                          ELSE
-                              C(J,J) = BETA*DBLE(C(J,J)) +
-     +                                 DBLE(ALPHA*TEMP1+
-     +                                 DCONJG(ALPHA)*TEMP2)
-                          END IF
-                      ELSE
-                          IF (BETA.EQ.DBLE(ZERO)) THEN
-                              C(I,J) = ALPHA*TEMP1 + DCONJG(ALPHA)*TEMP2
-                          ELSE
-                              C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                                 DCONJG(ALPHA)*TEMP2
-                          END IF
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + DCONJG(A(L,I))*B(L,J)
-                          TEMP2 = TEMP2 + DCONJG(B(L,I))*A(L,J)
-  220                 CONTINUE
-                      IF (I.EQ.J) THEN
-                          IF (BETA.EQ.DBLE(ZERO)) THEN
-                              C(J,J) = DBLE(ALPHA*TEMP1+
-     +                                 DCONJG(ALPHA)*TEMP2)
-                          ELSE
-                              C(J,J) = BETA*DBLE(C(J,J)) +
-     +                                 DBLE(ALPHA*TEMP1+
-     +                                 DCONJG(ALPHA)*TEMP2)
-                          END IF
-                      ELSE
-                          IF (BETA.EQ.DBLE(ZERO)) THEN
-                              C(I,J) = ALPHA*TEMP1 + DCONJG(ALPHA)*TEMP2
-                          ELSE
-                              C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                                 DCONJG(ALPHA)*TEMP2
-                          END IF
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZHER2K.
-      END
diff --git a/superlu/BLAS/zherk.f b/superlu/BLAS/zherk.f
deleted file mode 100644
index d181ca0a..00000000
--- a/superlu/BLAS/zherk.f
+++ /dev/null
@@ -1,396 +0,0 @@
-*> \brief \b ZHERK
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZHERK  performs one of the hermitian rank k operations
-*>    C := alpha*A*A**H + beta*C,
-*> or
-*>    C := alpha*A**H*A + beta*C,
-*> where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
-*> matrix and  A  is an  n by k  matrix in the  first case and a  k by n
-*> matrix in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**H + beta*C.
-*>              TRANS = 'C' or 'c'   C := alpha*A**H*A + beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
-*>           matrix A.  K must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION .
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set,  they are assumed to be zero,  and on exit they
-*>           are set to zero.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*>  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
-*>     Ed Anderson, Cray Research Inc.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. Parameters ..
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-                      C(J,J) = BETA*DBLE(C(J,J))
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      C(J,J) = BETA*DBLE(C(J,J))
-                      DO 70 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*A**H + beta*C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                      C(J,J) = BETA*DBLE(C(J,J))
-                  ELSE
-                      C(J,J) = DBLE(C(J,J))
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.DCMPLX(ZERO)) THEN
-                          TEMP = ALPHA*DCONJG(A(J,L))
-                          DO 110 I = 1,J - 1
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                          C(J,J) = DBLE(C(J,J)) + DBLE(TEMP*A(I,L))
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      C(J,J) = BETA*DBLE(C(J,J))
-                      DO 150 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  ELSE
-                      C(J,J) = DBLE(C(J,J))
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.DCMPLX(ZERO)) THEN
-                          TEMP = ALPHA*DCONJG(A(J,L))
-                          C(J,J) = DBLE(C(J,J)) + DBLE(TEMP*A(J,L))
-                          DO 160 I = J + 1,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**H*A + beta*C.
-          IF (UPPER) THEN
-              DO 220 J = 1,N
-                  DO 200 I = 1,J - 1
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + DCONJG(A(L,I))*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-                  RTEMP = ZERO
-                  DO 210 L = 1,K
-                      RTEMP = RTEMP + DCONJG(A(L,J))*A(L,J)
-  210             CONTINUE
-                  IF (BETA.EQ.ZERO) THEN
-                      C(J,J) = ALPHA*RTEMP
-                  ELSE
-                      C(J,J) = ALPHA*RTEMP + BETA*DBLE(C(J,J))
-                  END IF
-  220         CONTINUE
-          ELSE
-              DO 260 J = 1,N
-                  RTEMP = ZERO
-                  DO 230 L = 1,K
-                      RTEMP = RTEMP + DCONJG(A(L,J))*A(L,J)
-  230             CONTINUE
-                  IF (BETA.EQ.ZERO) THEN
-                      C(J,J) = ALPHA*RTEMP
-                  ELSE
-                      C(J,J) = ALPHA*RTEMP + BETA*DBLE(C(J,J))
-                  END IF
-                  DO 250 I = J + 1,N
-                      TEMP = ZERO
-                      DO 240 L = 1,K
-                          TEMP = TEMP + DCONJG(A(L,I))*A(L,J)
-  240                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  250             CONTINUE
-  260         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZHERK .
-      END
diff --git a/superlu/BLAS/zhpmv.f b/superlu/BLAS/zhpmv.f
deleted file mode 100644
index 0d5d325b..00000000
--- a/superlu/BLAS/zhpmv.f
+++ /dev/null
@@ -1,338 +0,0 @@
-*> \brief \b ZHPMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 AP(*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZHPMV  performs the matrix-vector operation
-*>    y := alpha*A*x + beta*y,
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 AP(*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 6
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set up the start points in  X  and  Y.
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*     First form  y := beta*y.
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  y  when AP contains the upper triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  K = KK
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + DCONJG(AP(K))*X(I)
-                      K = K + 1
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*DBLE(AP(KK+J-1)) + ALPHA*TEMP2
-                  KK = KK + J
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 K = KK,KK + J - 2
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + DCONJG(AP(K))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*DBLE(AP(KK+J-1)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   80         CONTINUE
-          END IF
-      ELSE
-*        Form  y  when AP contains the lower triangle.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*DBLE(AP(KK))
-                  K = KK + 1
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + DCONJG(AP(K))*X(I)
-                      K = K + 1
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-                  KK = KK + (N-J+1)
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*DBLE(AP(KK))
-                  IX = JX
-                  IY = JY
-                  DO 110 K = KK + 1,KK + N - J
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + DCONJG(AP(K))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + (N-J+1)
-  120         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZHPMV .
-      END
diff --git a/superlu/BLAS/zhpr.f b/superlu/BLAS/zhpr.f
deleted file mode 100644
index 70051c8a..00000000
--- a/superlu/BLAS/zhpr.f
+++ /dev/null
@@ -1,279 +0,0 @@
-*> \brief \b ZHPR
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZHPR    performs the hermitian rank 1 operation
-*>    A := alpha*x*x**H + A,
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 AP(*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHPR  ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-*     Set the start point in X if the increment is not unity.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when upper triangle is stored in AP.
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(J))
-                      K = KK
-                      DO 10 I = 1,J - 1
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   10                 CONTINUE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1)) + DBLE(X(J)*TEMP)
-                  ELSE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1))
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(JX))
-                      IX = KX
-                      DO 30 K = KK,KK + J - 2
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1)) + DBLE(X(JX)*TEMP)
-                  ELSE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1))
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when lower triangle is stored in AP.
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(J))
-                      AP(KK) = DBLE(AP(KK)) + DBLE(TEMP*X(J))
-                      K = KK + 1
-                      DO 50 I = J + 1,N
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   50                 CONTINUE
-                  ELSE
-                      AP(KK) = DBLE(AP(KK))
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(JX))
-                      AP(KK) = DBLE(AP(KK)) + DBLE(TEMP*X(JX))
-                      IX = JX
-                      DO 70 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          AP(K) = AP(K) + X(IX)*TEMP
-   70                 CONTINUE
-                  ELSE
-                      AP(KK) = DBLE(AP(KK))
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZHPR  .
-      END
diff --git a/superlu/BLAS/zhpr2.f b/superlu/BLAS/zhpr2.f
deleted file mode 100644
index c9fb7585..00000000
--- a/superlu/BLAS/zhpr2.f
+++ /dev/null
@@ -1,318 +0,0 @@
-*> \brief \b ZHPR2
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 AP(*),X(*),Y(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZHPR2  performs the hermitian rank 2 operation
-*>    A := alpha*x*y**H + conjg( alpha )*y*x**H + A,
-*> where alpha is a scalar, x and y are n element vectors and A is an
-*> n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 AP(*),X(*),Y(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-          INFO = 5
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHPR2 ',INFO)
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*        Form  A  when upper triangle is stored in AP.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(J))
-                      TEMP2 = DCONJG(ALPHA*X(J))
-                      K = KK
-                      DO 10 I = 1,J - 1
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   10                 CONTINUE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1)) +
-     +                             DBLE(X(J)*TEMP1+Y(J)*TEMP2)
-                  ELSE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1))
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(JY))
-                      TEMP2 = DCONJG(ALPHA*X(JX))
-                      IX = KX
-                      IY = KY
-                      DO 30 K = KK,KK + J - 2
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1)) +
-     +                             DBLE(X(JX)*TEMP1+Y(JY)*TEMP2)
-                  ELSE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*        Form  A  when lower triangle is stored in AP.
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(J))
-                      TEMP2 = DCONJG(ALPHA*X(J))
-                      AP(KK) = DBLE(AP(KK)) +
-     +                         DBLE(X(J)*TEMP1+Y(J)*TEMP2)
-                      K = KK + 1
-                      DO 50 I = J + 1,N
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   50                 CONTINUE
-                  ELSE
-                      AP(KK) = DBLE(AP(KK))
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(JY))
-                      TEMP2 = DCONJG(ALPHA*X(JX))
-                      AP(KK) = DBLE(AP(KK)) +
-     +                         DBLE(X(JX)*TEMP1+Y(JY)*TEMP2)
-                      IX = JX
-                      IY = JY
-                      DO 70 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          IY = IY + INCY
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-   70                 CONTINUE
-                  ELSE
-                      AP(KK) = DBLE(AP(KK))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZHPR2 .
-      END
diff --git a/superlu/BLAS/zrotg.f b/superlu/BLAS/zrotg.f
deleted file mode 100644
index e5c406db..00000000
--- a/superlu/BLAS/zrotg.f
+++ /dev/null
@@ -1,75 +0,0 @@
-*> \brief \b ZROTG
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX*16 CA,CB,S
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    ZROTG determines a double complex Givens rotation.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level1
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      COMPLEX*16 CA,CB,S
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-      IF (CDABS(CA).EQ.0.0d0) THEN
-         C = 0.0d0
-         S = (1.0d0,0.0d0)
-         CA = CB
-      ELSE
-         SCALE = CDABS(CA) + CDABS(CB)
-         NORM = SCALE*DSQRT((CDABS(CA/DCMPLX(SCALE,0.0d0)))**2+
-     $       (CDABS(CB/DCMPLX(SCALE,0.0d0)))**2)
-         ALPHA = CA/CDABS(CA)
-         C = CDABS(CA)/NORM
-         CA = ALPHA*NORM
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zscal.f b/superlu/BLAS/zscal.f
deleted file mode 100644
index ca038aac..00000000
--- a/superlu/BLAS/zscal.f
+++ /dev/null
@@ -1,91 +0,0 @@
-*> \brief \b ZSCAL
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ZA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    ZSCAL scales a vector by a constant.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-      COMPLEX*16 ZA
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-         DO I = 1,N
-            ZX(I) = ZA*ZX(I)
-         END DO
-      ELSE
-*        code for increment not equal to 1
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            ZX(I) = ZA*ZX(I)
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zswap.f b/superlu/BLAS/zswap.f
deleted file mode 100644
index 02a5b97e..00000000
--- a/superlu/BLAS/zswap.f
+++ /dev/null
@@ -1,98 +0,0 @@
-*> \brief \b ZSWAP
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*),ZY(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*>    ZSWAP interchanges two vectors.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level1
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>     jack dongarra, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*),ZY(*)
-*     ..
-*  =====================================================================
-*     .. Local Scalars ..
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*       code for both increments equal to 1
-         DO I = 1,N
-            ZTEMP = ZX(I)
-            ZX(I) = ZY(I)
-            ZY(I) = ZTEMP
-         END DO
-      ELSE
-*       code for unequal increments or equal increments not equal
-*         to 1
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            ZTEMP = ZX(IX)
-            ZX(IX) = ZY(IY)
-            ZY(IY) = ZTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zsymm.f b/superlu/BLAS/zsymm.f
deleted file mode 100644
index 1dc267a7..00000000
--- a/superlu/BLAS/zsymm.f
+++ /dev/null
@@ -1,369 +0,0 @@
-*> \brief \b ZSYMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZSYMM  performs one of the matrix-matrix operations
-*>    C := alpha*A*B + beta*C,
-*> or
-*>    C := alpha*B*A + beta*C,
-*> where  alpha and beta are scalars, A is a symmetric matrix and  B and
-*> C are m by n matrices.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  symmetric matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  symmetric  matrix   A  is  to  be
-*>           referenced as follows:
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least max( 1, n ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     Set NROWA as the number of rows of A.
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-*     And when
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSAME(SIDE,'L')) THEN
-*        Form  C := alpha*A*B + beta*C.
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*B*A + beta*C.
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*A(J,J)
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*A(J,K)
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(J,K)
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-      RETURN
-*     End of ZSYMM .
-      END
diff --git a/superlu/BLAS/zsyr2k.f b/superlu/BLAS/zsyr2k.f
deleted file mode 100644
index d358ed00..00000000
--- a/superlu/BLAS/zsyr2k.f
+++ /dev/null
@@ -1,396 +0,0 @@
-*> \brief \b ZSYR2K
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZSYR2K  performs one of the symmetric rank 2k operations
-*>    C := alpha*A*B**T + alpha*B*A**T + beta*C,
-*> or
-*>    C := alpha*A**T*B + alpha*B**T*A + beta*C,
-*> where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
-*> and  A and B  are  n by k  matrices  in the  first  case  and  k by n
-*> matrices in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'    C := alpha*A*B**T + alpha*B*A**T +
-*>                                         beta*C.
-*>              TRANS = 'T' or 't'    C := alpha*A**T*B + alpha*B**T*A +
-*>                                         beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'T' or 't',  K  specifies  the number of rows of the
-*>           matrices  A and B.  K must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'T'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 9
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*B**T + alpha*B*A**T + C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**T*B + alpha*B**T*A + C.
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZSYR2K.
-      END
diff --git a/superlu/BLAS/zsyrk.f b/superlu/BLAS/zsyrk.f
deleted file mode 100644
index 79591b45..00000000
--- a/superlu/BLAS/zsyrk.f
+++ /dev/null
@@ -1,363 +0,0 @@
-*> \brief \b ZSYRK
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),C(LDC,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZSYRK  performs one of the symmetric rank k operations
-*>    C := alpha*A*A**T + beta*C,
-*> or
-*>    C := alpha*A**T*A + beta*C,
-*> where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
-*> and  A  is an  n by k  matrix in the first case and a  k by n  matrix
-*> in the second case.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**T + beta*C.
-*>              TRANS = 'T' or 't'   C := alpha*A**T*A + beta*C.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'T' or 't',  K  specifies  the number of rows of the
-*>           matrix A.  K must be at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),C(LDC,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     Test the input parameters.
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-     +         (.NOT.LSAME(TRANS,'T'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-          INFO = 7
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*     And when
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*     Start the operations.
-*        Form  C := alpha*A*A**T + beta*C.
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*        Form  C := alpha*A**T*A + beta*C.
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP = ZERO
-                      DO 220 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-      RETURN
-*     End of ZSYRK .
-      END
diff --git a/superlu/BLAS/ztbmv.f b/superlu/BLAS/ztbmv.f
deleted file mode 100644
index 1e03f2ba..00000000
--- a/superlu/BLAS/ztbmv.f
+++ /dev/null
@@ -1,429 +0,0 @@
-*> \brief \b ZTBMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZTBMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is (input/output) COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX   too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*         Form  x := A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = KPLUS1 - J
-                          DO 10 I = MAX(1,J-K),J - 1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = KPLUS1 - J
-                          DO 30 I = MAX(1,J-K),J - 1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
-                      END IF
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = 1 - J
-                          DO 50 I = MIN(N,J+K),J + 1,-1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(1,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = 1 - J
-                          DO 70 I = MIN(N,J+K),J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(1,J)
-                      END IF
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x  or  x := A**H*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                          DO 90 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + A(L+I,J)*X(I)
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(KPLUS1,J))
-                          DO 100 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + DCONJG(A(L+I,J))*X(I)
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      KX = KX - INCX
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                          DO 120 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + A(L+I,J)*X(IX)
-                              IX = IX - INCX
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(KPLUS1,J))
-                          DO 130 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + DCONJG(A(L+I,J))*X(IX)
-                              IX = IX - INCX
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(1,J)
-                          DO 150 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + A(L+I,J)*X(I)
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(1,J))
-                          DO 160 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + DCONJG(A(L+I,J))*X(I)
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      KX = KX + INCX
-                      IX = KX
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(1,J)
-                          DO 180 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + A(L+I,J)*X(IX)
-                              IX = IX + INCX
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(1,J))
-                          DO 190 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + DCONJG(A(L+I,J))*X(IX)
-                              IX = IX + INCX
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of ZTBMV .
-      END
diff --git a/superlu/BLAS/ztbsv.f b/superlu/BLAS/ztbsv.f
deleted file mode 100644
index 50c4bb42..00000000
--- a/superlu/BLAS/ztbsv.f
+++ /dev/null
@@ -1,432 +0,0 @@
-*> \brief \b ZTBSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZTBSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular band matrix, with ( k + 1 )
-*> diagonals.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed by sequentially with one pass through A.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(KPLUS1,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,MAX(1,J-K),-1
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 40 J = N,1,-1
-                      KX = KX - INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(KPLUS1,J)
-                          TEMP = X(JX)
-                          DO 30 I = J - 1,MAX(1,J-K),-1
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX - INCX
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          L = 1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(1,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,MIN(N,J+K)
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      KX = KX + INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = 1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(1,J)
-                          TEMP = X(JX)
-                          DO 70 I = J + 1,MIN(N,J+K)
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX + INCX
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          DO 90 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - A(L+I,J)*X(I)
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      ELSE
-                          DO 100 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - DCONJG(A(L+I,J))*X(I)
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(KPLUS1,J))
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          DO 120 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - A(L+I,J)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      ELSE
-                          DO 130 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - DCONJG(A(L+I,J))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(KPLUS1,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          DO 150 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - A(L+I,J)*X(I)
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      ELSE
-                          DO 160 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - DCONJG(A(L+I,J))*X(I)
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(1,J))
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          DO 180 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - A(L+I,J)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      ELSE
-                          DO 190 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - DCONJG(A(L+I,J))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(1,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of ZTBSV .
-      END
diff --git a/superlu/BLAS/ztpmv.f b/superlu/BLAS/ztpmv.f
deleted file mode 100644
index d9aae425..00000000
--- a/superlu/BLAS/ztpmv.f
+++ /dev/null
@@ -1,388 +0,0 @@
-*> \brief \b ZTPMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZTPMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix, supplied in packed form.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is (input/output) COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 AP(*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*        Form  x:= A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K + 1
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK+J-1)
-                      END IF
-                      KK = KK + J
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 K = KK,KK + J - 2
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1)
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K - 1
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK-N+J)
-                      END IF
-                      KK = KK - (N-J+1)
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 K = KK,KK - (N- (J+1)),-1
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J)
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x  or  x := A**H*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK - 1
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 90 I = J - 1,1,-1
-                              TEMP = TEMP + AP(K)*X(I)
-                              K = K - 1
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK))
-                          DO 100 I = J - 1,1,-1
-                              TEMP = TEMP + DCONJG(AP(K))*X(I)
-                              K = K - 1
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-                      KK = KK - J
-  110             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 120 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + AP(K)*X(IX)
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK))
-                          DO 130 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + DCONJG(AP(K))*X(IX)
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - J
-  140             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      K = KK + 1
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 150 I = J + 1,N
-                              TEMP = TEMP + AP(K)*X(I)
-                              K = K + 1
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK))
-                          DO 160 I = J + 1,N
-                              TEMP = TEMP + DCONJG(AP(K))*X(I)
-                              K = K + 1
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-                      KK = KK + (N-J+1)
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 180 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              TEMP = TEMP + AP(K)*X(IX)
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK))
-                          DO 190 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              TEMP = TEMP + DCONJG(AP(K))*X(IX)
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of ZTPMV .
-      END
diff --git a/superlu/BLAS/ztpsv.f b/superlu/BLAS/ztpsv.f
deleted file mode 100644
index 5874fdc4..00000000
--- a/superlu/BLAS/ztpsv.f
+++ /dev/null
@@ -1,390 +0,0 @@
-*> \brief \b ZTPSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 AP(*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZTPSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix, supplied in packed form.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 AP(*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK - 1
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K - 1
-   10                     CONTINUE
-                      END IF
-                      KK = KK - J
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK + 1
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K + 1
-   50                     CONTINUE
-                      END IF
-                      KK = KK + (N-J+1)
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      K = KK
-                      IF (NOCONJ) THEN
-                          DO 90 I = 1,J - 1
-                              TEMP = TEMP - AP(K)*X(I)
-                              K = K + 1
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      ELSE
-                          DO 100 I = 1,J - 1
-                              TEMP = TEMP - DCONJG(AP(K))*X(I)
-                              K = K + 1
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK+J-1))
-                      END IF
-                      X(J) = TEMP
-                      KK = KK + J
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      IF (NOCONJ) THEN
-                          DO 120 K = KK,KK + J - 2
-                              TEMP = TEMP - AP(K)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      ELSE
-                          DO 130 K = KK,KK + J - 2
-                              TEMP = TEMP - DCONJG(AP(K))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK+J-1))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + J
-  140             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK
-                      IF (NOCONJ) THEN
-                          DO 150 I = N,J + 1,-1
-                              TEMP = TEMP - AP(K)*X(I)
-                              K = K - 1
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      ELSE
-                          DO 160 I = N,J + 1,-1
-                              TEMP = TEMP - DCONJG(AP(K))*X(I)
-                              K = K - 1
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK-N+J))
-                      END IF
-                      X(J) = TEMP
-                      KK = KK - (N-J+1)
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      IF (NOCONJ) THEN
-                          DO 180 K = KK,KK - (N- (J+1)),-1
-                              TEMP = TEMP - AP(K)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      ELSE
-                          DO 190 K = KK,KK - (N- (J+1)),-1
-                              TEMP = TEMP - DCONJG(AP(K))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK-N+J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of ZTPSV .
-      END
diff --git a/superlu/BLAS/ztrmm.f b/superlu/BLAS/ztrmm.f
deleted file mode 100644
index 229f3322..00000000
--- a/superlu/BLAS/ztrmm.f
+++ /dev/null
@@ -1,452 +0,0 @@
-*> \brief \b ZTRMM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZTRMM  performs one of the matrix-matrix operations
-*>    B := alpha*op( A )*B,   or   B := alpha*B*op( A )
-*> where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>    op( A ) = A   or   op( A ) = A**T   or   op( A ) = A**H.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE specifies whether  op( A ) multiplies B from
-*>           the left or right as follows:
-*>              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
-*>              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>              TRANSA = 'C' or 'c'   op( A ) = A**H.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m
-*>           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*> \param[in] B
-*> \verbatim
-*>          B is (input/output) COMPLEX*16 array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain the matrix  B,  and  on exit  is overwritten  by the
-*>           transformed matrix.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     Test the input parameters.
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-          INFO = 2
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-          INFO = 9
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*     And when
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*A*B.
-              IF (UPPER) THEN
-                  DO 50 J = 1,N
-                      DO 40 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              DO 30 I = 1,K - 1
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   30                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP*A(K,K)
-                              B(K,J) = TEMP
-                          END IF
-   40                 CONTINUE
-   50             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              B(K,J) = TEMP
-                              IF (NOUNIT) B(K,J) = B(K,J)*A(K,K)
-                              DO 60 I = K + 1,M
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   60                         CONTINUE
-                          END IF
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*A**T*B   or   B := alpha*A**H*B.
-              IF (UPPER) THEN
-                  DO 120 J = 1,N
-                      DO 110 I = M,1,-1
-                          TEMP = B(I,J)
-                          IF (NOCONJ) THEN
-                              IF (NOUNIT) TEMP = TEMP*A(I,I)
-                              DO 90 K = 1,I - 1
-                                  TEMP = TEMP + A(K,I)*B(K,J)
-   90                         CONTINUE
-                          ELSE
-                              IF (NOUNIT) TEMP = TEMP*DCONJG(A(I,I))
-                              DO 100 K = 1,I - 1
-                                  TEMP = TEMP + DCONJG(A(K,I))*B(K,J)
-  100                         CONTINUE
-                          END IF
-                          B(I,J) = ALPHA*TEMP
-  110                 CONTINUE
-  120             CONTINUE
-              ELSE
-                  DO 160 J = 1,N
-                      DO 150 I = 1,M
-                          TEMP = B(I,J)
-                          IF (NOCONJ) THEN
-                              IF (NOUNIT) TEMP = TEMP*A(I,I)
-                              DO 130 K = I + 1,M
-                                  TEMP = TEMP + A(K,I)*B(K,J)
-  130                         CONTINUE
-                          ELSE
-                              IF (NOUNIT) TEMP = TEMP*DCONJG(A(I,I))
-                              DO 140 K = I + 1,M
-                                  TEMP = TEMP + DCONJG(A(K,I))*B(K,J)
-  140                         CONTINUE
-                          END IF
-                          B(I,J) = ALPHA*TEMP
-  150                 CONTINUE
-  160             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*B*A.
-              IF (UPPER) THEN
-                  DO 200 J = N,1,-1
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 170 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  170                 CONTINUE
-                      DO 190 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 180 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  180                         CONTINUE
-                          END IF
-  190                 CONTINUE
-  200             CONTINUE
-              ELSE
-                  DO 240 J = 1,N
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 210 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  210                 CONTINUE
-                      DO 230 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 220 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  220                         CONTINUE
-                          END IF
-  230                 CONTINUE
-  240             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*B*A**T   or   B := alpha*B*A**H.
-              IF (UPPER) THEN
-                  DO 280 K = 1,N
-                      DO 260 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = ALPHA*A(J,K)
-                              ELSE
-                                  TEMP = ALPHA*DCONJG(A(J,K))
-                              END IF
-                              DO 250 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  250                         CONTINUE
-                          END IF
-  260                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = TEMP*A(K,K)
-                          ELSE
-                              TEMP = TEMP*DCONJG(A(K,K))
-                          END IF
-                      END IF
-                      IF (TEMP.NE.ONE) THEN
-                          DO 270 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  270                     CONTINUE
-                      END IF
-  280             CONTINUE
-              ELSE
-                  DO 320 K = N,1,-1
-                      DO 300 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = ALPHA*A(J,K)
-                              ELSE
-                                  TEMP = ALPHA*DCONJG(A(J,K))
-                              END IF
-                              DO 290 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  290                         CONTINUE
-                          END IF
-  300                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = TEMP*A(K,K)
-                          ELSE
-                              TEMP = TEMP*DCONJG(A(K,K))
-                          END IF
-                      END IF
-                      IF (TEMP.NE.ONE) THEN
-                          DO 310 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  310                     CONTINUE
-                      END IF
-  320             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of ZTRMM .
-      END
diff --git a/superlu/BLAS/ztrmv.f b/superlu/BLAS/ztrmv.f
deleted file mode 100644
index ab9065cf..00000000
--- a/superlu/BLAS/ztrmv.f
+++ /dev/null
@@ -1,373 +0,0 @@
-*> \brief \b ZTRMV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZTRMV  performs one of the matrix-vector operations
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in] X
-*> \verbatim
-*>          X is (input/output) COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 6
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*        Form  x := A*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*A(I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 I = 1,J - 1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*A(I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 I = N,J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := A**T*x  or  x := A**H*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 90 I = J - 1,1,-1
-                              TEMP = TEMP + A(I,J)*X(I)
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J))
-                          DO 100 I = J - 1,1,-1
-                              TEMP = TEMP + DCONJG(A(I,J))*X(I)
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 120 I = J - 1,1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + A(I,J)*X(IX)
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J))
-                          DO 130 I = J - 1,1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + DCONJG(A(I,J))*X(IX)
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 150 I = J + 1,N
-                              TEMP = TEMP + A(I,J)*X(I)
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J))
-                          DO 160 I = J + 1,N
-                              TEMP = TEMP + DCONJG(A(I,J))*X(I)
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 180 I = J + 1,N
-                              IX = IX + INCX
-                              TEMP = TEMP + A(I,J)*X(IX)
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J))
-                          DO 190 I = J + 1,N
-                              IX = IX + INCX
-                              TEMP = TEMP + DCONJG(A(I,J))*X(IX)
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of ZTRMV .
-      END
diff --git a/superlu/BLAS/ztrsm.f b/superlu/BLAS/ztrsm.f
deleted file mode 100644
index cc1af763..00000000
--- a/superlu/BLAS/ztrsm.f
+++ /dev/null
@@ -1,477 +0,0 @@
-*> \brief \b ZTRSM
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZTRSM  solves one of the matrix equations
-*>    op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
-*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>    op( A ) = A   or   op( A ) = A**T   or   op( A ) = A**H.
-*> The matrix X is overwritten on B.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry, SIDE specifies whether op( A ) appears on the left
-*>           or right of X as follows:
-*>              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
-*>              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
-*> \endverbatim
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>              TRANSA = 'C' or 'c'   op( A ) = A**H.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, k ),
-*>           where k is m when SIDE = 'L' or 'l'
-*>             and k is n when SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*> \param[in,out] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain  the  right-hand  side  matrix  B,  and  on exit  is
-*>           overwritten by the solution matrix  X.
-*> \endverbatim
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level3
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 3 Blas routine.
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*)
-*     ..
-*  =====================================================================
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     Test the input parameters.
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-      INFO = 0
-          INFO = 1
-          INFO = 2
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-          INFO = 9
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*     And when
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*     Start the operations.
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*inv( A )*B.
-              IF (UPPER) THEN
-                  DO 60 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 30 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   30                     CONTINUE
-                      END IF
-                      DO 50 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 40 I = 1,K - 1
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   40                         CONTINUE
-                          END IF
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 100 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 70 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   70                     CONTINUE
-                      END IF
-                      DO 90 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 80 I = K + 1,M
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   80                         CONTINUE
-                          END IF
-   90                 CONTINUE
-  100             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*inv( A**T )*B
-*           or    B := alpha*inv( A**H )*B.
-              IF (UPPER) THEN
-                  DO 140 J = 1,N
-                      DO 130 I = 1,M
-                          TEMP = ALPHA*B(I,J)
-                          IF (NOCONJ) THEN
-                              DO 110 K = 1,I - 1
-                                  TEMP = TEMP - A(K,I)*B(K,J)
-  110                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          ELSE
-                              DO 120 K = 1,I - 1
-                                  TEMP = TEMP - DCONJG(A(K,I))*B(K,J)
-  120                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/DCONJG(A(I,I))
-                          END IF
-                          B(I,J) = TEMP
-  130                 CONTINUE
-  140             CONTINUE
-              ELSE
-                  DO 180 J = 1,N
-                      DO 170 I = M,1,-1
-                          TEMP = ALPHA*B(I,J)
-                          IF (NOCONJ) THEN
-                              DO 150 K = I + 1,M
-                                  TEMP = TEMP - A(K,I)*B(K,J)
-  150                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          ELSE
-                              DO 160 K = I + 1,M
-                                  TEMP = TEMP - DCONJG(A(K,I))*B(K,J)
-  160                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/DCONJG(A(I,I))
-                          END IF
-                          B(I,J) = TEMP
-  170                 CONTINUE
-  180             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*           Form  B := alpha*B*inv( A ).
-              IF (UPPER) THEN
-                  DO 230 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 190 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  190                     CONTINUE
-                      END IF
-                      DO 210 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 200 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  200                         CONTINUE
-                          END IF
-  210                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 220 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  220                     CONTINUE
-                      END IF
-  230             CONTINUE
-              ELSE
-                  DO 280 J = N,1,-1
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 240 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  240                     CONTINUE
-                      END IF
-                      DO 260 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 250 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  250                         CONTINUE
-                          END IF
-  260                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 270 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  270                     CONTINUE
-                      END IF
-  280             CONTINUE
-              END IF
-          ELSE
-*           Form  B := alpha*B*inv( A**T )
-*           or    B := alpha*B*inv( A**H ).
-              IF (UPPER) THEN
-                  DO 330 K = N,1,-1
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = ONE/A(K,K)
-                          ELSE
-                              TEMP = ONE/DCONJG(A(K,K))
-                          END IF
-                          DO 290 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  290                     CONTINUE
-                      END IF
-                      DO 310 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = A(J,K)
-                              ELSE
-                                  TEMP = DCONJG(A(J,K))
-                              END IF
-                              DO 300 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  300                         CONTINUE
-                          END IF
-  310                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 320 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  320                     CONTINUE
-                      END IF
-  330             CONTINUE
-              ELSE
-                  DO 380 K = 1,N
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = ONE/A(K,K)
-                          ELSE
-                              TEMP = ONE/DCONJG(A(K,K))
-                          END IF
-                          DO 340 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  340                     CONTINUE
-                      END IF
-                      DO 360 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = A(J,K)
-                              ELSE
-                                  TEMP = DCONJG(A(J,K))
-                              END IF
-                              DO 350 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  350                         CONTINUE
-                          END IF
-  360                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 370 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  370                     CONTINUE
-                      END IF
-  380             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of ZTRSM .
-      END
diff --git a/superlu/BLAS/ztrsv.f b/superlu/BLAS/ztrsv.f
deleted file mode 100644
index 577b5cae..00000000
--- a/superlu/BLAS/ztrsv.f
+++ /dev/null
@@ -1,375 +0,0 @@
-*> \brief \b ZTRSV
-*  =========== DOCUMENTATION ===========
-* Online html documentation available at
-*  Definition:
-*  ===========
-*       .. Scalar Arguments ..
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*)
-*       ..
-*> \par Purpose:
-*  =============
-*> \verbatim
-*> ZTRSV  solves one of the systems of equations
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix.
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*  Arguments:
-*  ==========
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*  Authors:
-*  ========
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*> \date December 2016
-*> \ingroup complex16_blas_level2
-*> \par Further Details:
-*  =====================
-*> \verbatim
-*>  Level 2 Blas routine.
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*  =====================================================================
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*     .. Scalar Arguments ..
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*)
-*     ..
-*  =====================================================================
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-*     ..
-*     .. External Functions ..
-*     ..
-*     .. External Subroutines ..
-*     ..
-*     .. Intrinsic Functions ..
-*     ..
-*     Test the input parameters.
-      INFO = 0
-          INFO = 1
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-          INFO = 6
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          RETURN
-      END IF
-*     Quick return if possible.
-      IF (N.EQ.0) RETURN
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-          KX = 1
-      END IF
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*        Form  x := inv( A )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*A(I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 I = J - 1,1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*A(I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 I = J + 1,N
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          DO 90 I = 1,J - 1
-                              TEMP = TEMP - A(I,J)*X(I)
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 100 I = 1,J - 1
-                              TEMP = TEMP - DCONJG(A(I,J))*X(I)
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(J,J))
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      IX = KX
-                      TEMP = X(JX)
-                      IF (NOCONJ) THEN
-                          DO 120 I = 1,J - 1
-                              TEMP = TEMP - A(I,J)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 130 I = 1,J - 1
-                              TEMP = TEMP - DCONJG(A(I,J))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(J,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          DO 150 I = N,J + 1,-1
-                              TEMP = TEMP - A(I,J)*X(I)
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 160 I = N,J + 1,-1
-                              TEMP = TEMP - DCONJG(A(I,J))*X(I)
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(J,J))
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      IX = KX
-                      TEMP = X(JX)
-                      IF (NOCONJ) THEN
-                          DO 180 I = N,J + 1,-1
-                              TEMP = TEMP - A(I,J)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 190 I = N,J + 1,-1
-                              TEMP = TEMP - DCONJG(A(I,J))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(J,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-      RETURN
-*     End of ZTRSV .
-      END
diff --git a/superlu/BLAS_f2c.h b/superlu/BLAS_f2c.h
deleted file mode 100644
index ad4552a4..00000000
--- a/superlu/BLAS_f2c.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/* f2c.h  --  Standard Fortran to C header file */
-// Copyright (C) 2004 
-// Christian Stimming <>
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public License as
-// published by the Free Software Foundation; either version 2, or (at
-// your option) any later version.
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// GNU Lesser General Public License for more details.
-// You should have received a copy of the GNU Lesser General Public License 
-// with this library; see the file COPYING.  If not, write to the Free
-// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
-/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
-       - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
-#ifndef F2C_INCLUDE
-#define F2C_INCLUDE
-typedef int integer;
-typedef char *address;
-typedef short int shortint;
-typedef float real;
-typedef double doublereal;
-typedef struct { real r, i; } complex;
-typedef struct { doublereal r, i; } doublecomplex;
-typedef int logical;
-typedef short int shortlogical;
-typedef char logical1;
-typedef char integer1;
-#define TRUE_ (1)
-#define FALSE_ (0)
-/* Extern is for use with -E */
-#ifndef Extern
-#define Extern extern
-/* I/O stuff */
-#ifdef f2c_i2
-/* for -i2 */
-typedef short flag;
-typedef short ftnlen;
-typedef short ftnint;
-typedef int flag;
-typedef int ftnlen;
-typedef int ftnint;
-/*external read, write*/
-typedef struct
-{      flag cierr;
-       ftnint ciunit;
-       flag ciend;
-       char *cifmt;
-       ftnint cirec;
-} cilist;
-/*internal read, write*/
-typedef struct
-{      flag icierr;
-       char *iciunit;
-       flag iciend;
-       char *icifmt;
-       ftnint icirlen;
-       ftnint icirnum;
-} icilist;
-typedef struct
-{      flag oerr;
-       ftnint ounit;
-       char *ofnm;
-       ftnlen ofnmlen;
-       char *osta;
-       char *oacc;
-       char *ofm;
-       ftnint orl;
-       char *oblnk;
-} olist;
-typedef struct
-{      flag cerr;
-       ftnint cunit;
-       char *csta;
-} cllist;
-/*rewind, backspace, endfile*/
-typedef struct
-{      flag aerr;
-       ftnint aunit;
-} alist;
-/* inquire */
-typedef struct
-{      flag inerr;
-       ftnint inunit;
-       char *infile;
-       ftnlen infilen;
-       ftnint  *inex;  /*parameters in standard's order*/
-       ftnint  *inopen;
-       ftnint  *innum;
-       ftnint  *innamed;
-       char    *inname;
-       ftnlen  innamlen;
-       char    *inacc;
-       ftnlen  inacclen;
-       char    *inseq;
-       ftnlen  inseqlen;
-       char    *indir;
-       ftnlen  indirlen;
-       char    *infmt;
-       ftnlen  infmtlen;
-       char    *inform;
-       ftnint  informlen;
-       char    *inunf;
-       ftnlen  inunflen;
-       ftnint  *inrecl;
-       ftnint  *innrec;
-       char    *inblank;
-       ftnlen  inblanklen;
-} inlist;
-#define VOID void
-union Multitype {      /* for multiple entry points */
-       shortint h;
-       integer i;
-       real r;
-       doublereal d;
-       complex c;
-       doublecomplex z;
-       };
-typedef union Multitype Multitype;
-typedef long Long;     /* No longer used; formerly in Namelist */
-struct Vardesc {       /* for Namelist */
-       char *name;
-       char *addr;
-       ftnlen *dims;
-       int  type;
-       };
-typedef struct Vardesc Vardesc;
-struct Namelist {
-       char *name;
-       Vardesc **vars;
-       int nvars;
-       };
-typedef struct Namelist Namelist;
-#ifndef abs
-#define abs(x) ((x) >= 0 ? (x) : -(x))
-#define dabs(x) (doublereal)abs(x)
-#ifndef min
-#define min(a,b) ((a) <= (b) ? (a) : (b))
-#ifndef max
-#define max(a,b) ((a) >= (b) ? (a) : (b))
-#define dmin(a,b) (doublereal)min(a,b)
-#define dmax(a,b) (doublereal)max(a,b)
-/* procedure parameter types for -A and -C++ */
-#define F2C_proc_par_types 1
-#ifdef __cplusplus
-typedef int /* Unknown procedure type */ (*U_fp)(...);
-typedef shortint (*J_fp)(...);
-typedef integer (*I_fp)(...);
-typedef real (*R_fp)(...);
-typedef doublereal (*D_fp)(...), (*E_fp)(...);
-typedef /* Complex */ VOID (*C_fp)(...);
-typedef /* Double Complex */ VOID (*Z_fp)(...);
-typedef logical (*L_fp)(...);
-typedef shortlogical (*K_fp)(...);
-typedef /* Character */ VOID (*H_fp)(...);
-typedef /* Subroutine */ int (*S_fp)(...);
-typedef int /* Unknown procedure type */ (*U_fp)();
-typedef shortint (*J_fp)();
-typedef integer (*I_fp)();
-typedef real (*R_fp)();
-typedef doublereal (*D_fp)(), (*E_fp)();
-typedef /* Complex */ VOID (*C_fp)();
-typedef /* Double Complex */ VOID (*Z_fp)();
-typedef logical (*L_fp)();
-typedef shortlogical (*K_fp)();
-typedef /* Character */ VOID (*H_fp)();
-typedef /* Subroutine */ int (*S_fp)();
-/* E_fp is for real functions when -R is not specified */
-typedef VOID C_f;      /* complex function */
-typedef VOID H_f;      /* character function */
-typedef VOID Z_f;      /* double complex function */
-typedef doublereal E_f;        /* real function with -R not specified */
-/* undef any lower-case symbols that your C compiler predefines, e.g.: */
-#ifndef Skip_f2c_Undefs
-#undef cray
-#undef gcos
-#undef mc68010
-#undef mc68020
-#undef mips
-#undef pdp11
-#undef sgi
-#undef sparc
-#undef sun
-#undef sun2
-#undef sun3
-#undef sun4
-#undef u370
-#undef u3b
-#undef u3b2
-#undef u3b5
-#undef unix
-#undef vax
diff --git a/superlu/License.txt b/superlu/License.txt
deleted file mode 100644
index b5b5b0c6..00000000
--- a/superlu/License.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-Redistribution and use in source and binary forms, with or without 
-are permitted provided that the following conditions are met: 
-(1) Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer. 
-(2) Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution. 
-(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of
-Energy nor the names of its contributors may be used to endorse or promote
-products derived from this software without specific prior written permission.
diff --git a/superlu/ b/superlu/
deleted file mode 100644
index 198642f1..00000000
--- a/superlu/
+++ /dev/null
@@ -1,329 +0,0 @@
-# Copyright (c) 2003, The Regents of the University of California, through
-# Lawrence Berkeley National Laboratory (subject to receipt of any required 
-# approvals from U.S. Dept. of Energy) 
-# All rights reserved. 
-# The source code is distributed under BSD license, see the file License.txt
-#  Copyright (C) 2004-2020 Yves Renard
-#  This file is a part of GetFEM++
-#  GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
-#  under  the  terms  of the  GNU  Lesser General Public License as published
-#  by  the  Free Software Foundation;  either version 3 of the License,  or
-#  (at your option) any later version along with the GCC Runtime Library
-#  Exception either version 3.1 or (at your option) any later version.
-#  This program  is  distributed  in  the  hope  that it will be useful,  but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-#  License and GCC Runtime Library Exception for more details.
-#  You  should  have received a copy of the GNU Lesser General Public License
-#  along  with  this program;  if not, write to the Free Software Foundation,
-#  Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
-       slu_Cnames.h slu_dcomplex.h slu_scomplex.h slu_util.h\
-       supermatrix.h colamd.h slu_cdefs.h slu_ddefs.h\
-       slu_sdefs.h slu_zdefs.h
-BLASLITEFILES=BLAS.c BLAS_f2c.h f2c_lite.c
-SRC = ccolumn_bmod.c \
-       ccolumn_dfs.c \
-       ccopy_to_ucol.c \
-       cgscon.c \
-       cgsequ.c \
-       cgsrfs.c \
-       cgssv.c \
-       cgssvx.c \
-       cgstrf.c \
-       cgstrs.c \
-       clacon.c \
-       clangs.c \
-       claqgs.c \
-       cmemory.c \
-       cmyblas2.c \
-       colamd.c \
-       cpanel_bmod.c \
-       cpanel_dfs.c \
-cpivotgrowth.c \
-cpivotL.c \
-cpruneL.c \
-creadhb.c \
-csnode_bmod.c \
-csnode_dfs.c \
-csp_blas2.c \
-csp_blas3.c \
-cutil.c \
-dcolumn_bmod.c \
-dcolumn_dfs.c \
-dcomplex.c \
-dcopy_to_ucol.c \
-dgscon.c \
-dgsequ.c \
-dgsrfs.c \
-dgssv.c \
-dgssvx.c \
-dgstrf.c \
-dgstrs.c \
-dlacon.c \
-dlamch.c \
-dlangs.c \
-dlaqgs.c \
-dmemory.c \
-dmyblas2.c \
-dpanel_bmod.c \
-dpanel_dfs.c \
-dpivotgrowth.c \
-dpivotL.c \
-dpruneL.c \
-dreadhb.c \
-dsnode_bmod.c \
-dsnode_dfs.c \
-dsp_blas2.c \
-dsp_blas3.c \
-dutil.c \
-dzsum1.c \
-get_perm_c.c \
-heap_relax_snode.c \
-icmax1.c \
-izmax1.c \
-lsame.c \
-memory.c \
-mmd.c \
-relax_snode.c \
-scolumn_bmod.c \
-scolumn_dfs.c \
-scomplex.c \
-scopy_to_ucol.c \
-scsum1.c \
-sgscon.c \
-sgsequ.c \
-sgsrfs.c \
-sgssv.c \
-sgssvx.c \
-sgstrf.c \
-sgstrs.c \
-slacon.c \
-slamch.c \
-slangs.c \
-slaqgs.c \
-smemory.c \
-smyblas2.c \
-spanel_bmod.c \
-spanel_dfs.c \
-sp_coletree.c \
-sp_ienv.c \
-spivotgrowth.c \
-spivotL.c \
-sp_preorder.c \
-spruneL.c \
-sreadhb.c \
-ssnode_bmod.c \
-ssnode_dfs.c \
-ssp_blas2.c \
-ssp_blas3.c \
-superlu_timer.c \
-sutil.c \
-util.c \
-zcolumn_bmod.c \
-zcolumn_dfs.c \
-zcopy_to_ucol.c \
-zgscon.c \
-zgsequ.c \
-zgsrfs.c \
-zgssv.c \
-zgssvx.c \
-zgstrf.c \
-zgstrs.c \
-zlacon.c \
-zlangs.c \
-zlaqgs.c \
-zmemory.c \
-zmyblas2.c \
-zpanel_bmod.c \
-zpanel_dfs.c \
-zpivotgrowth.c \
-zpivotL.c \
-zpruneL.c \
-zreadhb.c \
-zsnode_bmod.c \
-zsnode_dfs.c \
-zsp_blas2.c \
-zsp_blas3.c \
-#vire: xerbla.c 
-libsuperlu_la_SOURCES = $(SRC)
-CLEANFILES = ii_files/* *.o.d
-EXTRA_DIST=License.txt \
-BLAS/License.txt \
-BLAS/caxpy.f \
-BLAS/ccopy.f \
-BLAS/cdotc.f \
-BLAS/cdotu.f \
-BLAS/cgbmv.f \
-BLAS/cgemm.f \
-BLAS/cgemv.f \
-BLAS/cgerc.f \
-BLAS/cgeru.f \
-BLAS/chbmv.f \
-BLAS/chemm.f \
-BLAS/chemv.f \
-BLAS/cher2.f \
-BLAS/cher2k.f \
-BLAS/cher.f \
-BLAS/cherk.f \
-BLAS/chpmv.f \
-BLAS/chpr2.f \
-BLAS/chpr.f \
-BLAS/crotg.f \
-BLAS/cscal.f \
-BLAS/csrot.f \
-BLAS/csscal.f \
-BLAS/cswap.f \
-BLAS/csymm.f \
-BLAS/csyr2k.f \
-BLAS/csyrk.f \
-BLAS/ctbmv.f \
-BLAS/ctbsv.f \
-BLAS/ctpmv.f \
-BLAS/ctpsv.f \
-BLAS/ctrmm.f \
-BLAS/ctrmv.f \
-BLAS/ctrsm.f \
-BLAS/ctrsv.f \
-BLAS/dasum.f \
-BLAS/daxpy.f \
-BLAS/dcabs1.f \
-BLAS/dcopy.f \
-BLAS/ddot.f \
-BLAS/dgbmv.f \
-BLAS/dgemm.f \
-BLAS/dgemv.f \
-BLAS/dger.f \
-BLAS/dnrm2.f \
-BLAS/drot.f \
-BLAS/drotg.f \
-BLAS/drotm.f \
-BLAS/drotmg.f \
-BLAS/dsbmv.f \
-BLAS/dscal.f \
-BLAS/dsdot.f \
-BLAS/dspmv.f \
-BLAS/dspr2.f \
-BLAS/dspr.f \
-BLAS/dswap.f \
-BLAS/dsymm.f \
-BLAS/dsymv.f \
-BLAS/dsyr2.f \
-BLAS/dsyr2k.f \
-BLAS/dsyr.f \
-BLAS/dsyrk.f \
-BLAS/dtbmv.f \
-BLAS/dtbsv.f \
-BLAS/dtpmv.f \
-BLAS/dtpsv.f \
-BLAS/dtrmm.f \
-BLAS/dtrmv.f \
-BLAS/dtrsm.f \
-BLAS/dtrsv.f \
-BLAS/dzasum.f \
-BLAS/dznrm2.f \
-BLAS/icamax.f \
-BLAS/idamax.f \
-BLAS/isamax.f \
-BLAS/izamax.f \
-BLAS/lsame.f \
-BLAS/sasum.f \
-BLAS/saxpy.f \
-BLAS/scabs1.f \
-BLAS/scasum.f \
-BLAS/scnrm2.f \
-BLAS/scopy.f \
-BLAS/sdot.f \
-BLAS/sdsdot.f \
-BLAS/sgbmv.f \
-BLAS/sgemm.f \
-BLAS/sgemv.f \
-BLAS/sger.f \
-BLAS/snrm2.f \
-BLAS/srot.f \
-BLAS/srotg.f \
-BLAS/srotm.f \
-BLAS/srotmg.f \
-BLAS/ssbmv.f \
-BLAS/sscal.f \
-BLAS/sspmv.f \
-BLAS/sspr2.f \
-BLAS/sspr.f \
-BLAS/sswap.f \
-BLAS/ssymm.f \
-BLAS/ssymv.f \
-BLAS/ssyr2.f \
-BLAS/ssyr2k.f \
-BLAS/ssyr.f \
-BLAS/ssyrk.f \
-BLAS/stbmv.f \
-BLAS/stbsv.f \
-BLAS/stpmv.f \
-BLAS/stpsv.f \
-BLAS/strmm.f \
-BLAS/strmv.f \
-BLAS/strsm.f \
-BLAS/strsv.f \
-BLAS/xerbla_array.f \
-BLAS/xerbla.f \
-BLAS/zaxpy.f \
-BLAS/zcopy.f \
-BLAS/zdotc.f \
-BLAS/zdotu.f \
-BLAS/zdrot.f \
-BLAS/zdscal.f \
-BLAS/zgbmv.f \
-BLAS/zgemm.f \
-BLAS/zgemv.f \
-BLAS/zgerc.f \
-BLAS/zgeru.f \
-BLAS/zhbmv.f \
-BLAS/zhemm.f \
-BLAS/zhemv.f \
-BLAS/zher2.f \
-BLAS/zher2k.f \
-BLAS/zher.f \
-BLAS/zherk.f \
-BLAS/zhpmv.f \
-BLAS/zhpr2.f \
-BLAS/zhpr.f \
-BLAS/zrotg.f \
-BLAS/zscal.f \
-BLAS/zswap.f \
-BLAS/zsymm.f \
-BLAS/zsyr2k.f \
-BLAS/zsyrk.f \
-BLAS/ztbmv.f \
-BLAS/ztbsv.f \
-BLAS/ztpmv.f \
-BLAS/ztpsv.f \
-BLAS/ztrmm.f \
-BLAS/ztrmv.f \
-BLAS/ztrsm.f \
diff --git a/superlu/ccolumn_bmod.c b/superlu/ccolumn_bmod.c
deleted file mode 100644
index d6341af1..00000000
--- a/superlu/ccolumn_bmod.c
+++ /dev/null
@@ -1,362 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_cdefs.h"
-extern void ctrsv_();
-extern void cgemv_();
- * Function prototypes 
- */
-void cusolve(int, int, complex*, complex*);
-void clsolve(int, int, complex*, complex*);
-void cmatvec(int, int, int, complex*, complex*, complex*);
-/* Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-ccolumn_bmod (
-            const int  jcol,     /* in */
-            const int  nseg,     /* in */
-            complex     *dense,          /* in */
-            complex     *tempv,          /* working array */
-            int        *segrep,  /* in */
-            int        *repfnz,  /* in */
-            int        fpanelc,  /* in -- first column in the current panel */
-            GlobalLU_t *Glu,     /* modified */
-            SuperLUStat_t *stat  /* output */
-            )
- * Purpose:
- * ========
- *    Performs numeric block updates (sup-col) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-    int         incx = 1, incy = 1;
-    complex      alpha, beta;
-    /* krep = representative of current k-th supernode
-     * fsupc = first supernodal column
-     * nsupc = no of columns in supernode
-     * nsupr = no of rows in supernode (used as leading dimension)
-     * luptr = location of supernodal LU-block in storage
-     * kfnz = first nonz in the k-th supernodal segment
-     * no_zeros = no of leading zeros in a supernodal U-segment
-     */
-    complex       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          fsupc, nsupc, nsupr, segsze;
-    int          nrow;   /* No of rows in the matrix of matrix-vector */
-    int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
-    register int lptr, kfnz, isub, irow, i;
-    register int no_zeros, new_next; 
-    int          ufirst, nextlu;
-    int          fst_col; /* First column within small LU update */
-    int          d_fsupc; /* Distance between the first column of the current
-                            panel and the first column of the current snode. */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    complex       *lusup;
-    int          *xlusup;
-    int          nzlumax;
-    complex       *tempv1;
-    complex      zero = {0.0, 0.0};
-    complex      one = {1.0, 0.0};
-    complex      none = {-1.0, 0.0};
-    complex     comp_temp, comp_temp1;
-    int          mem_error;
-    flops_t      *ops = stat->ops;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nzlumax = Glu->nzlumax;
-    jcolp1 = jcol + 1;
-    jsupno = supno[jcol];
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k];
-       k--;
-       ksupno = supno[krep];
-       if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
-           fsupc = xsup[ksupno];
-           fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-           /* Distance from the current supernode to the current panel; 
-              d_fsupc=0 if fsupc > fpanelc. */
-           d_fsupc = fst_col - fsupc; 
-           luptr = xlusup[fst_col] + d_fsupc;
-           lptr = xlsub[fsupc] + d_fsupc;
-           kfnz = repfnz[krep];
-           kfnz = SUPERLU_MAX ( kfnz, fpanelc );
-           segsze = krep - kfnz + 1;
-           nsupc = krep - fst_col + 1;
-           nsupr = xlsub[fsupc+1] - xlsub[fsupc];      /* Leading dimension */
-           nrow = nsupr - d_fsupc - nsupc;
-           krep_ind = lptr + nsupc - 1;
-           /* 
-            * Case 1: Update U-segment of size 1 -- col-col update 
-            */
-           if ( segsze == 1 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc;
-               for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                   irow = lsub[i];
-                   cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                   c_sub(&dense[irow], &dense[irow], &comp_temp);
-                   luptr++;
-               }
-           } else if ( segsze <= 3 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc-1;
-               ukj1 = dense[lsub[krep_ind - 1]];
-               luptr1 = luptr - nsupr;
-               if ( segsze == 2 ) { /* Case 2: 2cols-col update */
-                   cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                   c_sub(&ukj, &ukj, &comp_temp);
-                   dense[lsub[krep_ind]] = ukj;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                       c_add(&comp_temp, &comp_temp, &comp_temp1);
-                       c_sub(&dense[irow], &dense[irow], &comp_temp);
-                   }
-               } else { /* Case 3: 3cols-col update */
-                   ukj2 = dense[lsub[krep_ind - 2]];
-                   luptr2 = luptr1 - nsupr;
-                   cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                   c_sub(&ukj1, &ukj1, &comp_temp);
-                   cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                   cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                   c_add(&comp_temp, &comp_temp, &comp_temp1);
-                   c_sub(&ukj, &ukj, &comp_temp);
-                   dense[lsub[krep_ind]] = ukj;
-                   dense[lsub[krep_ind-1]] = ukj1;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       luptr2++;
-                       cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                       c_add(&comp_temp, &comp_temp, &comp_temp1);
-                       cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       c_add(&comp_temp, &comp_temp, &comp_temp1);
-                       c_sub(&dense[irow], &dense[irow], &comp_temp);
-                   }
-               }
-           } else {
-               /*
-                * Case: sup-col update
-                * Perform a triangular solve and block update,
-                * then scatter the result of sup-col update to dense
-                */
-               no_zeros = kfnz - fst_col;
-               /* Copy U[*,j] segment from dense[*] to tempv[*] */
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   tempv[i] = dense[irow];
-                   ++isub; 
-               }
-               /* Dense triangular solve -- start effective triangle */
-               luptr += nsupr * no_zeros + no_zeros; 
-#ifdef _CRAY
-               CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-               ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-                alpha = one;
-                beta = zero;
-#ifdef _CRAY
-               CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-               cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-               clsolve ( nsupr, segsze, &lusup[luptr], tempv );
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-               cmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
-                /* Scatter tempv[] into SPA dense[] as a temporary storage */
-                isub = lptr + no_zeros;
-                for (i = 0; i < segsze; i++) {
-                    irow = lsub[isub];
-                    dense[irow] = tempv[i];
-                    tempv[i] = zero;
-                    ++isub;
-                }
-               /* Scatter tempv1[] into SPA dense[] */
-               for (i = 0; i < nrow; i++) {
-                   irow = lsub[isub];
-                   c_sub(&dense[irow], &dense[irow], &tempv1[i]);
-                   tempv1[i] = zero;
-                   ++isub;
-               }
-           }
-       } /* if jsupno ... */
-    } /* for each segment... */
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    nextlu = xlusup[jcol];
-    fsupc = xsup[jsupno];
-    /* Copy the SPA dense into L\U[*,j] */
-    new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
-    while ( new_next > nzlumax ) {
-       if (mem_error = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
-           return (mem_error);
-       lusup = Glu->lusup;
-       lsub = Glu->lsub;
-    }
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = zero;
-       ++nextlu;
-    }
-    xlusup[jcolp1] = nextlu;   /* Close L\U[*,jcol] */
-    /* For more updates within the panel (also within the current supernode), 
-     * should start from the first column of the panel, or the first column 
-     * of the supernode, whichever is bigger. There are 2 cases:
-     *    1) fsupc < fpanelc, then fst_col := fpanelc
-     *    2) fsupc >= fpanelc, then fst_col := fsupc
-     */
-    fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-    if ( fst_col < jcol ) {
-       /* Distance between the current supernode and the current panel.
-          d_fsupc=0 if fsupc >= fpanelc. */
-       d_fsupc = fst_col - fsupc;
-       lptr = xlsub[fsupc] + d_fsupc;
-       luptr = xlusup[fst_col] + d_fsupc;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];  /* Leading dimension */
-       nsupc = jcol - fst_col; /* Excluding jcol */
-       nrow = nsupr - d_fsupc - nsupc;
-       /* Points to the beginning of jcol in snode L\U(jsupno) */
-       ufirst = xlusup[jcol] + d_fsupc;        
-       ops[TRSV] += 4 * nsupc * (nsupc - 1);
-       ops[GEMV] += 8 * nrow * nsupc;
-#ifdef _CRAY
-       CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-       ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-       alpha = none; beta = one; /* y := beta*y + alpha*A*x */
-#ifdef _CRAY
-       CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
-               &lusup[ufirst], tempv );
-        /* Copy updates from tempv[*] into lusup[*] */
-       isub = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           c_sub(&lusup[isub], &lusup[isub], &tempv[i]);
-           tempv[i] = zero;
-           ++isub;
-       }
-    } /* if fst_col < jcol ... */ 
-    return 0;
diff --git a/superlu/ccolumn_dfs.c b/superlu/ccolumn_dfs.c
deleted file mode 100644
index e60ba5af..00000000
--- a/superlu/ccolumn_dfs.c
+++ /dev/null
@@ -1,266 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_cdefs.h"
-/* What type of supernodes we want */
-#define T2_SUPER
-          const int  m,         /* in - number of rows in the matrix */
-          const int  jcol,      /* in */
-          int        *perm_r,   /* in */
-          int        *nseg,     /* modified - with new segments appended */
-          int        *lsub_col, /* in - defines the RHS vector to start the 
dfs */
-          int        *segrep,   /* modified - with new segments appended */
-          int        *repfnz,   /* modified */
-          int        *xprune,   /* modified */
-          int        *marker,   /* modified */
-          int        *parent,   /* working array */
-          int        *xplore,   /* working array */
-          GlobalLU_t *Glu       /* modified */
-          )
- * Purpose
- * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
- *   decide the supernode boundary.
- *
- *   This routine does not use numeric values, but only use the RHS 
- *   row indices to start the dfs.
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives. The routine returns a list of such supernodal 
- *   representatives in topological order of the dfs that generates them.
- *   The location of the first nonzero in each such supernodal segment
- *   (supernodal entry location) is also returned.
- *
- * Local parameters
- * ================
- *   nseg: no of segments in current U[*,j]
- *   jsuper: jsuper=EMPTY if column j does not belong to the same
- *     supernode as j-1. Otherwise, jsuper=nsuper.
- *
- *   marker2: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- * Return value
- * ============
- *     0  success;
- *   > 0  number of bytes allocated when run out of space.
- *
- */
-    int     jcolp1, jcolm1, jsuper, nsuper, nextl;
-    int     k, krep, krow, kmark, kperm;
-    int     *marker2;           /* Used for small panel LU */
-    int            fsupc;              /* First column of a snode */
-    int     myfnz;             /* First nonz column of a U-segment */
-    int            chperm, chmark, chrep, kchild;
-    int     xdfs, maxdfs, kpar, oldrep;
-    int     jptr, jm1ptr;
-    int     ito, ifrom, istop; /* Used to compress row subscripts */
-    int     mem_error;
-    int     *xsup, *supno, *lsub, *xlsub;
-    int     nzlmax;
-    static  int  first = 1, maxsuper;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       first = 0;
-    }
-    jcolp1  = jcol + 1;
-    jcolm1  = jcol - 1;
-    nsuper  = supno[jcol];
-    jsuper  = nsuper;
-    nextl   = xlsub[jcol];
-    marker2 = &marker[2*m];
-    /* For each nonzero in A[*,jcol] do dfs */
-    for (k = 0; lsub_col[k] != EMPTY; k++) {
-       krow = lsub_col[k];
-       lsub_col[k] = EMPTY;
-       kmark = marker2[krow];          
-       /* krow was visited before, go to the next nonz */
-        if ( kmark == jcol ) continue; 
-       /* For each unmarked nbr krow of jcol
-        *      krow is in L: place it in structure of L[*,jcol]
-        */
-       marker2[krow] = jcol;
-       kperm = perm_r[krow];
-       if ( kperm == EMPTY ) {
-           lsub[nextl++] = krow;       /* krow is indexed into A */
-           if ( nextl >= nzlmax ) {
-               if ( mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-                   return (mem_error);
-               lsub = Glu->lsub;
-           }
-            if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing 
-       } else {
-           /*  krow is in U: if its supernode-rep krep
-            *  has been explored, update repfnz[*]
-            */
-           krep = xsup[supno[kperm]+1] - 1;
-           myfnz = repfnz[krep];
-           if ( myfnz != EMPTY ) {     /* Visited before */
-               if ( myfnz > kperm ) repfnz[krep] = kperm;
-               /* continue; */
-           }
-           else {
-               /* Otherwise, perform dfs starting at krep */
-               oldrep = EMPTY;
-               parent[krep] = oldrep;
-               repfnz[krep] = kperm;
-               xdfs = xlsub[krep];
-               maxdfs = xprune[krep];
-               do {
-                   /* 
-                    * For each unmarked kchild of krep 
-                    */
-                   while ( xdfs < maxdfs ) {
-                       kchild = lsub[xdfs];
-                       xdfs++;
-                       chmark = marker2[kchild];
-                       if ( chmark != jcol ) { /* Not reached yet */
-                           marker2[kchild] = jcol;
-                           chperm = perm_r[kchild];
-                           /* Case kchild is in L: place it in L[*,k] */
-                           if ( chperm == EMPTY ) {
-                               lsub[nextl++] = kchild;
-                               if ( nextl >= nzlmax ) {
-                                   if ( mem_error =
cLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu) )
-                                       return (mem_error);
-                                   lsub = Glu->lsub;
-                               }
-                               if ( chmark != jcolm1 ) jsuper = EMPTY;
-                           } else {
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               chrep = xsup[supno[chperm]+1] - 1;
-                               myfnz = repfnz[chrep];
-                               if ( myfnz != EMPTY ) { /* Visited before */
-                                   if ( myfnz > chperm )
-                                       repfnz[chrep] = chperm;
-                               } else {
-                                   /* Continue dfs at super-rep of kchild */
-                                   xplore[krep] = xdfs;        
-                                   oldrep = krep;
-                                   krep = chrep; /* Go deeper down G(L^t) */
-                                   parent[krep] = oldrep;
-                                   repfnz[krep] = chperm;
-                                   xdfs = xlsub[krep];     
-                                   maxdfs = xprune[krep];
-                               } /* else */
-                          } /* else */
-                       } /* if */
-                   } /* while */
-                   /* krow has no more unexplored nbrs;
-                    *    place supernode-rep krep in postorder DFS.
-                    *    backtrack dfs to its parent
-                    */
-                   segrep[*nseg] = krep;
-                   ++(*nseg);
-                   kpar = parent[krep]; /* Pop from stack, mimic recursion */
-                   if ( kpar == EMPTY ) break; /* dfs done */
-                   krep = kpar;
-                   xdfs = xplore[krep];
-                   maxdfs = xprune[krep];
-               } while ( kpar != EMPTY );      /* Until empty stack */
-           } /* else */
-       } /* else */
-    } /* for each nonzero ... */
-    /* Check to see if j belongs in the same supernode as j-1 */
-    if ( jcol == 0 ) { /* Do nothing for column 0 */
-       nsuper = supno[0] = 0;
-    } else {
-       fsupc = xsup[nsuper];
-       jptr = xlsub[jcol];     /* Not compressed yet */
-       jm1ptr = xlsub[jcolm1];
-#ifdef T2_SUPER
-       if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
-       /* Make sure the number of columns in a supernode doesn't
-          exceed threshold. */
-       if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
-       /* If jcol starts a new supernode, reclaim storage space in
-        * lsub from the previous supernode. Note we only store
-        * the subscript set of the first and last columns of
-        * a supernode. (first for num values, last for pruning)
-        */
-       if ( jsuper == EMPTY ) {        /* starts a new supernode */
-           if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
-               printf("  Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
-               ito = xlsub[fsupc+1];
-               xlsub[jcolm1] = ito;
-               istop = ito + jptr - jm1ptr;
-               xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
-               xlsub[jcol] = istop;
-               for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
-                   lsub[ito] = lsub[ifrom];
-               nextl = ito;            /* = istop + length(jcol) */
-           }
-           nsuper++;
-           supno[jcol] = nsuper;
-       } /* if a new supernode */
-    }  /* else: jcol > 0 */ 
-    /* Tidy up the pointers before exit */
-    xsup[nsuper+1] = jcolp1;
-    supno[jcolp1]  = nsuper;
-    xprune[jcol]   = nextl;    /* Initialize upper bound for pruning */
-    xlsub[jcolp1]  = nextl;
-    return 0;
diff --git a/superlu/ccopy_to_ucol.c b/superlu/ccopy_to_ucol.c
deleted file mode 100644
index a0554a96..00000000
--- a/superlu/ccopy_to_ucol.c
+++ /dev/null
@@ -1,112 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_cdefs.h"
-             int        jcol,    /* in */
-             int        nseg,    /* in */
-             int        *segrep,  /* in */
-             int        *repfnz,  /* in */
-             int        *perm_r,  /* in */
-             complex     *dense,   /* modified - reset to zero on return */
-             GlobalLU_t *Glu      /* modified */
-             )
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int ksub, krep, ksupno;
-    int i, k, kfnz, segsze;
-    int fsupc, isub, irow;
-    int jsupno, nextu;
-    int new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    complex    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-    complex zero = {0.0, 0.0};
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k--];
-       ksupno = supno[krep];
-       if ( ksupno != jsupno ) { /* Should go into ucol[] */
-           kfnz = repfnz[krep];
-           if ( kfnz != EMPTY ) {      /* Nonzero U-segment */
-               fsupc = xsup[ksupno];
-               isub = xlsub[fsupc] + kfnz - fsupc;
-               segsze = krep - kfnz + 1;
-               new_next = nextu + segsze;
-               while ( new_next > nzumax ) {
-                   if (mem_error = cLUMemXpand(jcol, nextu, UCOL, &nzumax, 
-                       return (mem_error);
-                   ucol = Glu->ucol;
-                   if (mem_error = cLUMemXpand(jcol, nextu, USUB, &nzumax, 
-                       return (mem_error);
-                   usub = Glu->usub;
-                   lsub = Glu->lsub;
-               }
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   usub[nextu] = perm_r[irow];
-                   ucol[nextu] = dense[irow];
-                   dense[irow] = zero;
-                   nextu++;
-                   isub++;
-               } 
-           }
-       }
-    } /* for each segment... */
-    xusub[jcol + 1] = nextu;      /* Close U[*,jcol] */
-    return 0;
diff --git a/superlu/cgscon.c b/superlu/cgscon.c
deleted file mode 100644
index 5bd3c49a..00000000
--- a/superlu/cgscon.c
+++ /dev/null
@@ -1,155 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-  Copyright (c) 2003 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  cgscon.c
- * History:     Modified from lapack routines CGECON.
- */
-#include <math.h>
-#include "slu_cdefs.h"
-cgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
-       float anorm, float *rcond, SuperLUStat_t *stat, int *info)
-    Purpose   
-    =======   
-    CGSCON estimates the reciprocal of the condition number of a general 
-    real matrix A, in either the 1-norm or the infinity-norm, using   
-    the LU factorization computed by CGETRF.   
-    An estimate is obtained for norm(inv(A)), and the reciprocal of the   
-    condition number is computed as   
-       RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    NORM    (input) char*
-            Specifies whether the 1-norm condition number or the   
-            infinity-norm condition number is required:   
-            = '1' or 'O':  1-norm;   
-            = 'I':         Infinity-norm.
-    L       (input) SuperMatrix*
-            The factor L from the factorization Pr*A*Pc=L*U as computed by
-            cgstrf(). Use compressed row subscripts storage for supernodes,
-            i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
-    U       (input) SuperMatrix*
-            The factor U from the factorization Pr*A*Pc=L*U as computed by
-            cgstrf(). Use column-wise storage scheme, i.e., U has types:
-            Stype = SLU_NC, Dtype = SLU_C, Mtype = TRU.
-    ANORM   (input) float
-            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
-            If NORM = 'I', the infinity-norm of the original matrix A.
-    RCOND   (output) float*
-            The reciprocal of the condition number of the matrix A,   
-            computed as RCOND = 1/(norm(A) * norm(inv(A))).
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-    ===================================================================== 
-    /* Local variables */
-    int    kase, kase1, onenrm, i;
-    float ainvnm;
-    complex *work;
-    extern int crscl_(int *, complex *, complex *, int *);
-    extern int clacon_(int *, complex *, complex *, float *, int *);
-    /* Test the input parameters. */
-    *info = 0;
-    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
-    if (! onenrm && ! lsame_(norm, "I")) *info = -1;
-    else if (L->nrow < 0 || L->nrow != L->ncol ||
-             L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU)
-        *info = -2;
-    else if (U->nrow < 0 || U->nrow != U->ncol ||
-             U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU) 
-       *info = -3;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("cgscon", &i);
-       return;
-    }
-    /* Quick return if possible */
-    *rcond = 0.;
-    if ( L->nrow == 0 || U->nrow == 0) {
-       *rcond = 1.;
-       return;
-    }
-    work = complexCalloc( 3*L->nrow );
-    if ( !work )
-       ABORT("Malloc fails for work arrays in cgscon.");
-    /* Estimate the norm of inv(A). */
-    ainvnm = 0.;
-    if ( onenrm ) kase1 = 1;
-    else kase1 = 2;
-    kase = 0;
-    do {
-       clacon_(&L->nrow, &work[L->nrow], &work[0], &ainvnm, &kase);
-       if (kase == 0) break;
-       if (kase == kase1) {
-           /* Multiply by inv(L). */
-           sp_ctrsv("L", "No trans", "Unit", L, U, &work[0], stat, info);
-           /* Multiply by inv(U). */
-           sp_ctrsv("U", "No trans", "Non-unit", L, U, &work[0], stat, info);
-       } else {
-           /* Multiply by inv(U'). */
-           sp_ctrsv("U", "Transpose", "Non-unit", L, U, &work[0], stat, info);
-           /* Multiply by inv(L'). */
-           sp_ctrsv("L", "Transpose", "Unit", L, U, &work[0], stat, info);
-       }
-    } while ( kase != 0 );
-    /* Compute the estimate of the reciprocal condition number. */
-    if (ainvnm != 0.) *rcond = (1. / ainvnm) / anorm;
-    SUPERLU_FREE (work);
-    return;
-} /* cgscon */
diff --git a/superlu/cgsequ.c b/superlu/cgsequ.c
deleted file mode 100644
index 9dbacef3..00000000
--- a/superlu/cgsequ.c
+++ /dev/null
@@ -1,205 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  cgsequ.c
- * History:     Modified from LAPACK routine CGEEQU
- */
-#include <math.h>
-#include "slu_cdefs.h"
-cgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
-       float *colcnd, float *amax, int *info)
-    Purpose   
-    =======   
-    CGSEQU computes row and column scalings intended to equilibrate an   
-    M-by-N sparse matrix A and reduce its condition number. R returns the row
-    scale factors and C the column scale factors, chosen to try to make   
-    the largest element in each row and column of the matrix B with   
-    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
-    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
-    number and BIGNUM = largest safe number.  Use of these scaling   
-    factors is not guaranteed to reduce the condition number of A but   
-    works well in practice.   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    A       (input) SuperMatrix*
-            The matrix of dimension (A->nrow, A->ncol) whose equilibration
-            factors are to be computed. The type of A can be:
-            Stype = SLU_NC; Dtype = SLU_C; Mtype = SLU_GE.
-    R       (output) float*, size A->nrow
-            If INFO = 0 or INFO > M, R contains the row scale factors   
-            for A.
-    C       (output) float*, size A->ncol
-            If INFO = 0,  C contains the column scale factors for A.
-    ROWCND  (output) float*
-            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
-            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
-            AMAX is neither too large nor too small, it is not worth   
-            scaling by R.
-    COLCND  (output) float*
-            If INFO = 0, COLCND contains the ratio of the smallest   
-            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
-            worth scaling by C.
-    AMAX    (output) float*
-            Absolute value of largest matrix element.  If AMAX is very   
-            close to overflow or very close to underflow, the matrix   
-            should be scaled.
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-            > 0:  if INFO = i,  and i is   
-                  <= A->nrow:  the i-th row of A is exactly zero   
-                  >  A->ncol:  the (i-M)-th column of A is exactly zero   
-    ===================================================================== 
-    /* Local variables */
-    NCformat *Astore;
-    complex   *Aval;
-    int i, j, irow;
-    float rcmin, rcmax;
-    float bignum, smlnum;
-    extern double slamch_(char *);
-    /* Test the input parameters. */
-    *info = 0;
-    if ( A->nrow < 0 || A->ncol < 0 ||
-        A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE )
-       *info = -1;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("cgsequ", &i);
-       return;
-    }
-    /* Quick return if possible */
-    if ( A->nrow == 0 || A->ncol == 0 ) {
-       *rowcnd = 1.;
-       *colcnd = 1.;
-       *amax = 0.;
-       return;
-    }
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Get machine constants. */
-    smlnum = slamch_("S");
-    bignum = 1. / smlnum;
-    /* Compute row scale factors. */
-    for (i = 0; i < A->nrow; ++i) r[i] = 0.;
-    /* Find the maximum element in each row. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           r[irow] = SUPERLU_MAX( r[irow], c_abs1(&Aval[i]) );
-       }
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (i = 0; i < A->nrow; ++i) {
-       rcmax = SUPERLU_MAX(rcmax, r[i]);
-       rcmin = SUPERLU_MIN(rcmin, r[i]);
-    }
-    *amax = rcmax;
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (i = 0; i < A->nrow; ++i)
-           if (r[i] == 0.) {
-               *info = i + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (i = 0; i < A->nrow; ++i)
-           r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum );
-       /* Compute ROWCND = min(R(I)) / max(R(I)) */
-       *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-    /* Compute column scale factors */
-    for (j = 0; j < A->ncol; ++j) c[j] = 0.;
-    /* Find the maximum element in each column, assuming the row
-       scalings computed above. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           c[j] = SUPERLU_MAX( c[j], c_abs1(&Aval[i]) * r[irow] );
-       }
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (j = 0; j < A->ncol; ++j) {
-       rcmax = SUPERLU_MAX(rcmax, c[j]);
-       rcmin = SUPERLU_MIN(rcmin, c[j]);
-    }
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (j = 0; j < A->ncol; ++j)
-           if ( c[j] == 0. ) {
-               *info = A->nrow + j + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (j = 0; j < A->ncol; ++j)
-           c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum);
-       /* Compute COLCND = min(C(J)) / max(C(J)) */
-       *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-    return;
-} /* cgsequ */
diff --git a/superlu/cgsrfs.c b/superlu/cgsrfs.c
deleted file mode 100644
index 7b06feec..00000000
--- a/superlu/cgsrfs.c
+++ /dev/null
@@ -1,457 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * File name:  cgsrfs.c
- * History:     Modified from lapack routine CGERFS
- */
-#include <math.h>
-#include "slu_cdefs.h"
-cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
-       int *perm_c, int *perm_r, char *equed, float *R, float *C,
-       SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr,
-       SuperLUStat_t *stat, int *info)
- *   Purpose   
- *   =======   
- *
- *   CGSRFS improves the computed solution to a system of linear   
- *   equations and provides error bounds and backward error estimates for 
- *   the solution.   
- *
- *   If equilibration was performed, the system becomes:
- *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- *   Arguments   
- *   =========   
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *   
- *   A       (input) SuperMatrix*
- *           The original matrix A in the system, or the scaled A if
- *           equilibration was done. The type of A can be:
- *           Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_GE.
- *    
- *   L       (input) SuperMatrix*
- *          The factor L from the factorization Pr*A*Pc=L*U. Use
- *           compressed row subscripts storage for supernodes, 
- *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = 
- * 
- *   U       (input) SuperMatrix*
- *           The factor U from the factorization Pr*A*Pc=L*U as computed by
- *           cgstrf(). Use column-wise storage scheme, 
- *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
- *
- *   perm_c  (input) int*, dimension (A->ncol)
- *          Column permutation vector, which defines the 
- *           permutation matrix Pc; perm_c[i] = j means column i of A is 
- *           in position j in A*Pc.
- *
- *   perm_r  (input) int*, dimension (A->nrow)
- *           Row permutation vector, which defines the permutation matrix Pr;
- *           perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- *   equed   (input) Specifies the form of equilibration that was done.
- *           = 'N': No equilibration.
- *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *           = 'C': Column equilibration, i.e., A was postmultiplied by
- *                  diag(C).
- *           = 'B': Both row and column equilibration, i.e., A was replaced 
- *                  by diag(R)*A*diag(C).
- *
- *   R       (input) float*, dimension (A->nrow)
- *           The row scale factors for A.
- *           If equed = 'R' or 'B', A is premultiplied by diag(R).
- *           If equed = 'N' or 'C', R is not accessed.
- * 
- *   C       (input) float*, dimension (A->ncol)
- *           The column scale factors for A.
- *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
- *           If equed = 'N' or 'R', C is not accessed.
- *
- *   B       (input) SuperMatrix*
- *           B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *           The right hand side matrix B.
- *           if equed = 'R' or 'B', B is premultiplied by diag(R).
- *
- *   X       (input/output) SuperMatrix*
- *           X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *           On entry, the solution matrix X, as computed by cgstrs().
- *           On exit, the improved solution matrix X.
- *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
- *               in order to obtain the solution to the original system.
- *
- *   FERR    (output) float*, dimension (B->ncol)   
- *           The estimated forward error bound for each solution vector   
- *           X(j) (the j-th column of the solution matrix X).   
- *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *           is an estimated upper bound for the magnitude of the largest 
- *           element in (X(j) - XTRUE) divided by the magnitude of the   
- *           largest element in X(j).  The estimate is as reliable as   
- *           the estimate for RCOND, and is almost always a slight   
- *           overestimate of the true error.
- *
- *   BERR    (output) float*, dimension (B->ncol)   
- *           The componentwise relative backward error of each solution   
- *           vector X(j) (i.e., the smallest relative change in   
- *           any element of A or B that makes X(j) an exact solution).
- *
- *   stat     (output) SuperLUStat_t*
- *            Record the statistics on runtime and floating-point operation 
- *            See util.h for the definition of 'SuperLUStat_t'.
- *
- *   info    (output) int*   
- *           = 0:  successful exit   
- *            < 0:  if INFO = -i, the i-th argument had an illegal value   
- *
- *    Internal Parameters   
- *    ===================   
- *
- *    ITMAX is the maximum number of steps of iterative refinement.   
- *
- */  
-#define ITMAX 5
-    /* Table of constant values */
-    int    ione = 1;
-    complex ndone = {-1., 0.};
-    complex done = {1., 0.};
-    /* Local variables */
-    NCformat *Astore;
-    complex   *Aval;
-    SuperMatrix Bjcol;
-    DNformat *Bstore, *Xstore, *Bjcol_store;
-    complex   *Bmat, *Xmat, *Bptr, *Xptr;
-    int      kase;
-    float   safe1, safe2;
-    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
-    int      ldb, ldx, nrhs;
-    float   s, xk, lstres, eps, safmin;
-    char     transc[1];
-    trans_t  transt;
-    complex   *work;
-    float   *rwork;
-    int      *iwork;
-    extern double slamch_(char *);
-    extern int clacon_(int *, complex *, complex *, float *, int *);
-#ifdef _CRAY
-    extern int CCOPY(int *, complex *, int *, complex *, int *);
-    extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *);
-    extern int ccopy_(int *, complex *, int *, complex *, int *);
-    extern int caxpy_(int *, complex *, complex *, int *, complex *, int *);
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    /* Test the input parameters */
-    *info = 0;
-    notran = (trans == NOTRANS);
-    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
-       *info = -3;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
-       *info = -4;
-    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
-        *info = -10;
-    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
-             X->Stype != SLU_DN || X->Dtype != SLU_C || X->Mtype != SLU_GE )
-       *info = -11;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("cgsrfs", &i);
-       return;
-    }
-    /* Quick return if possible */
-    if ( A->nrow == 0 || nrhs == 0) {
-       for (j = 0; j < nrhs; ++j) {
-           ferr[j] = 0.;
-           berr[j] = 0.;
-       }
-       return;
-    }
-    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-    colequ = lsame_(equed, "C") || lsame_(equed, "B");
-    /* Allocate working space */
-    work = complexMalloc(2*A->nrow);
-    rwork = (float *) SUPERLU_MALLOC( A->nrow * sizeof(float) );
-    iwork = intMalloc(A->nrow);
-    if ( !work || !rwork || !iwork ) 
-        ABORT("Malloc fails for work/rwork/iwork.");
-    if ( notran ) {
-       *(unsigned char *)transc = 'N';
-        transt = TRANS;
-    } else {
-       *(unsigned char *)transc = 'T';
-       transt = NOTRANS;
-    }
-    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
-    nz     = A->ncol + 1;
-    eps    = slamch_("Epsilon");
-    safmin = slamch_("Safe minimum");
-    safe1  = nz * safmin;
-    safe2  = safe1 / eps;
-    /* Compute the number of nonzeros in each row (or column) of A */
-    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
-    if ( notran ) {
-       for (k = 0; k < A->ncol; ++k)
-           for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
-               ++iwork[Astore->rowind[i]];
-    } else {
-       for (k = 0; k < A->ncol; ++k)
-           iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
-    }  
-    /* Copy one column of RHS B into Bjcol. */
-    Bjcol.Stype = B->Stype;
-    Bjcol.Dtype = B->Dtype;
-    Bjcol.Mtype = B->Mtype;
-    Bjcol.nrow  = B->nrow;
-    Bjcol.ncol  = 1;
-    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
-    Bjcol_store = Bjcol.Store;
-    Bjcol_store->lda = ldb;
-    Bjcol_store->nzval = work; /* address aliasing */
-    /* Do for each right hand side ... */
-    for (j = 0; j < nrhs; ++j) {
-       count = 0;
-       lstres = 3.;
-       Bptr = &Bmat[j*ldb];
-       Xptr = &Xmat[j*ldx];
-       while (1) { /* Loop until stopping criterion is satisfied. */
-           /* Compute residual R = B - op(A) * X,   
-              where op(A) = A, A**T, or A**H, depending on TRANS. */
-#ifdef _CRAY
-           CCOPY(&A->nrow, Bptr, &ione, work, &ione);
-           ccopy_(&A->nrow, Bptr, &ione, work, &ione);
-           sp_cgemv(transc, ndone, A, Xptr, ione, done, work, ione);
-           /* Compute componentwise relative backward error from formula 
-              max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
-              where abs(Z) is the componentwise absolute value of the matrix
-              or vector Z.  If the i-th component of the denominator is less
-              than SAFE2, then SAFE1 is added to the i-th component of the   
-              numerator and denominator before dividing. */
-           for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
-           /* Compute abs(op(A))*abs(X) + abs(B). */
-           if (notran) {
-               for (k = 0; k < A->ncol; ++k) {
-                   xk = c_abs1( &Xptr[k] );
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                       rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
-               }
-           } else {
-               for (k = 0; k < A->ncol; ++k) {
-                   s = 0.;
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]);
-                   }
-                   rwork[k] += s;
-               }
-           }
-           s = 0.;
-           for (i = 0; i < A->nrow; ++i) {
-               if (rwork[i] > safe2)
-                   s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] );
-               else
-                   s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / 
-                               (rwork[i] + safe1) );
-           }
-           berr[j] = s;
-           /* Test stopping criterion. Continue iterating if   
-              1) The residual BERR(J) is larger than machine epsilon, and   
-              2) BERR(J) decreased by at least a factor of 2 during the   
-                 last iteration, and   
-              3) At most ITMAX iterations tried. */
-           if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
-               /* Update solution and try again. */
-               cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-#ifdef _CRAY
-               CAXPY(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-               caxpy_(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-               lstres = berr[j];
-               ++count;
-           } else {
-               break;
-           }
-       } /* end while */
-       stat->RefineSteps = count;
-       /* Bound error from formula:
-          norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
-          ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
-          where   
-            norm(Z) is the magnitude of the largest component of Z   
-            inv(op(A)) is the inverse of op(A)   
-            abs(Z) is the componentwise absolute value of the matrix or
-              vector Z   
-            NZ is the maximum number of nonzeros in any row of A, plus 1   
-            EPS is machine epsilon   
-          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
-          is incremented by SAFE1 if the i-th component of   
-          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   
-          Use CLACON to estimate the infinity-norm of the matrix   
-             inv(op(A)) * diag(W),   
-          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
-       for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
-       /* Compute abs(op(A))*abs(X) + abs(B). */
-       if ( notran ) {
-           for (k = 0; k < A->ncol; ++k) {
-               xk = c_abs1( &Xptr[k] );
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                   rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
-           }
-       } else {
-           for (k = 0; k < A->ncol; ++k) {
-               s = 0.;
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   xk = c_abs1( &Xptr[irow] );
-                   s += c_abs1(&Aval[i]) * xk;
-               }
-               rwork[k] += s;
-           }
-       }
-       for (i = 0; i < A->nrow; ++i)
-           if (rwork[i] > safe2)
-               rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
-           else
-               rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
-       kase = 0;
-       do {
-           clacon_(&A->nrow, &work[A->nrow], work,
-                   &ferr[j], &kase);
-           if (kase == 0) break;
-           if (kase == 1) {
-               /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       cs_mult(&work[i], &work[i], C[i]);
-                   }
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->nrow; ++i) {
-                       cs_mult(&work[i], &work[i], R[i]);
-                    }
-               cgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               for (i = 0; i < A->nrow; ++i) {
-                   cs_mult(&work[i], &work[i], rwork[i]);
-               }
-           } else {
-               /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
-               for (i = 0; i < A->nrow; ++i) {
-                   cs_mult(&work[i], &work[i], rwork[i]);
-               }
-               cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       cs_mult(&work[i], &work[i], C[i]);
-                   }
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       cs_mult(&work[i], &work[i], R[i]);  
-                   }
-           }
-       } while ( kase != 0 );
-       /* Normalize error. */
-       lstres = 0.;
-       if ( notran && colequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) );
-       } else if ( !notran && rowequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) );
-       } else {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) );
-       }
-       if ( lstres != 0. )
-           ferr[j] /= lstres;
-    } /* for each RHS j ... */
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(rwork);
-    SUPERLU_FREE(iwork);
-    SUPERLU_FREE(Bjcol.Store);
-    return;
-} /* cgsrfs */
diff --git a/superlu/cgssv.c b/superlu/cgssv.c
deleted file mode 100644
index 04f62b8a..00000000
--- a/superlu/cgssv.c
+++ /dev/null
@@ -1,231 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_cdefs.h"
-cgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-      SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
-      SuperLUStat_t *stat, int *info )
- * Purpose
- * =======
- *
- * CGSSV solves the system of linear equations A*X=B, using the
- * LU factorization from CGSTRF. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *
- *      1.1. Permute the columns of A, forming A*Pc, where Pc
- *           is a permutation matrix. For more details of this step, 
- *           see sp_preorder.c.
- *
- *      1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
- *           by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      1.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the
- *      above algorithm to the transpose of A:
- *
- *      2.1. Permute columns of transpose(A) (rows of A),
- *           forming transpose(A)*Pc, where Pc is a permutation matrix. 
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
- *           determined by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      2.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- * 
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR; Dtype = SLU_C; Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- * perm_c  (input/output) int*
- *         If A->Stype = SLU_NC, column permutation vector of size A->ncol
- *         which defines the permutation matrix Pc; perm_c[i] = j means 
- *         column i of A is in position j in A*Pc.
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- *         If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
- *            options->Fact = SamePattern_SameRowPerm, it is an input argument.
- *            On exit, perm_c may be overwritten by the product of the input
- *            perm_c and a permutation that postorders the elimination tree
- *            of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *            is already in postorder.
- *         Otherwise, it is an output argument.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined 
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->RowPerm = MY_PERMR or
- *            options->Fact = SamePattern_SameRowPerm, perm_r is an
- *            input argument.
- *         otherwise it is an output argument.
- *
- * L       (output) SuperMatrix*
- *         The factor L from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *         
- * U       (output) SuperMatrix*
- *        The factor U from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *         > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                so the solution could not be computed.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol.
- *   
- */
-    DNformat *Bstore;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int      lwork = 0, *etree, i;
-    /* Set default values for some parameters */
-    float   drop_tol = 0.;
-    int      panel_size;     /* panel size */
-    int      relax;          /* no of columns in a relaxed snodes */
-    int      permc_spec;
-    trans_t  trans = NOTRANS;
-    double   *utime;
-    double   t;        /* Temporary time */
-    /* Test the input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    if ( options->Fact != DOFACT ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-        (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-        A->Dtype != SLU_C || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-       B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
-       *info = -7;
-    if ( *info != 0 ) {
-       i = -(*info);
-       xerbla_("cgssv", &i);
-       return;
-    }
-    utime = stat->utime;
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       cCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       trans = TRANS;
-    } else {
-        if ( A->Stype == SLU_NC ) AA = A;
-    }
-    t = SuperLU_timer_();
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = NATURAL:  natural ordering 
-     *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-     *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-     *   permc_spec = COLAMD:   approximate minimum degree column ordering
-     *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-     */
-    permc_spec = options->ColPerm;
-    if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-      get_perm_c(permc_spec, AA, perm_c);
-    utime[COLPERM] = SuperLU_timer_() - t;
-    etree = intMalloc(A->ncol);
-    t = SuperLU_timer_();
-    sp_preorder(options, AA, perm_c, etree, &AC);
-    utime[ETREE] = SuperLU_timer_() - t;
-    panel_size = sp_ienv(1);
-    relax = sp_ienv(2);
-    /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-         relax, panel_size, sp_ienv(3), sp_ienv(4));*/
-    t = SuperLU_timer_(); 
-    /* Compute the LU factorization of A. */
-    cgstrf(options, &AC, drop_tol, relax, panel_size,
-          etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
-    utime[FACT] = SuperLU_timer_() - t;
-    t = SuperLU_timer_();
-    if ( *info == 0 ) {
-        /* Solve the system A*X=B, overwriting B with X. */
-        cgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
-    }
-    utime[SOLVE] = SuperLU_timer_() - t;
-    SUPERLU_FREE (etree);
-    Destroy_CompCol_Permuted(&AC);
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-    }
diff --git a/superlu/cgssvx.c b/superlu/cgssvx.c
deleted file mode 100644
index 905e7c73..00000000
--- a/superlu/cgssvx.c
+++ /dev/null
@@ -1,627 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_cdefs.h"
-cgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-       int *etree, char *equed, float *R, float *C,
-       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
-       SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, 
-       float *rcond, float *ferr, float *berr, 
-       mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info )
- * Purpose
- * =======
- *
- * CGSSVX solves the system of linear equations A*X=B or A'*X=B, using
- * the LU factorization from cgstrf(). Error bounds on the solution and
- * a condition estimate are also provided. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *  
- *      1.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A is
- *           overwritten by diag(R)*A*diag(C) and B by diag(R)*B
- *           (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
- *           = TRANS or CONJ).
- *
- *      1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
- *           matrix that usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      1.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the matrix A (after equilibration if options->Equil = YES)
- *           as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
- *
- *      1.4. Compute the reciprocal pivot growth factor.
- *
- *      1.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form of 
- *           A is used to estimate the condition number of the matrix A. If
- *           the reciprocal of the condition number is less than machine
- *           precision, info = A->ncol+1 is returned as a warning, but the
- *           routine still goes on to solve for X and computes error bounds
- *           as described below.
- *
- *      1.6. The system of equations is solved for X using the factored form
- *           of A.
- *
- *      1.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      1.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R)
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
- *      to the transpose of A:
- *
- *      2.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A' is
- *           overwritten by diag(R)*A'*diag(C) and B by diag(R)*B 
- *           (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
- *
- *      2.2. Permute columns of transpose(A) (rows of A), 
- *           forming transpose(A)*Pc, where Pc is a permutation matrix that 
- *           usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the transpose(A) (after equilibration if 
- *           options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
- *           permutation Pr determined by partial pivoting.
- *
- *      2.4. Compute the reciprocal pivot growth factor.
- *
- *      2.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form 
- *           of transpose(A) is used to estimate the condition number of the
- *           matrix A. If the reciprocal of the condition number
- *           is less than machine precision, info = A->nrow+1 is returned as
- *           a warning, but the routine still goes on to solve for X and
- *           computes error bounds as described below.
- *
- *      2.6. The system of equations is solved for X using the factored form
- *           of transpose(A).
- *
- *      2.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      2.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R) 
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input/output) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- *         On entry, If options->Fact = FACTORED and equed is not 'N', 
- *         then A must have been equilibrated by the scaling factors in
- *         R and/or C.  
- *         On exit, A is not modified if options->Equil = NO, or if 
- *         options->Equil = YES but equed = 'N' on exit.
- *         Otherwise, if options->Equil = YES and equed is not 'N',
- *         A is scaled as follows:
- *         If A->Stype = SLU_NC:
- *           equed = 'R':  A := diag(R) * A
- *           equed = 'C':  A := A * diag(C)
- *           equed = 'B':  A := diag(R) * A * diag(C).
- *         If A->Stype = SLU_NR:
- *           equed = 'R':  transpose(A) := diag(R) * transpose(A)
- *           equed = 'C':  transpose(A) := transpose(A) * diag(C)
- *           equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
- *
- * perm_c  (input/output) int*
- *        If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
- *         which defines the permutation matrix Pc; perm_c[i] = j means
- *         column i of A is in position j in A*Pc.
- *         On exit, perm_c may be overwritten by the product of the input
- *         perm_c and a permutation that postorders the elimination tree
- *         of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *         is already in postorder.
- *
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow,
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *         will try to use the input perm_r, unless a certain threshold
- *         criterion is violated. In that case, perm_r is overwritten by a
- *         new permutation determined by partial pivoting or diagonal
- *         threshold pivoting.
- *         Otherwise, perm_r is output argument.
- * 
- * etree   (input/output) int*,  dimension (A->ncol)
- *         Elimination tree of Pc'*A'*A*Pc.
- *         If options->Fact != FACTORED and options->Fact != DOFACT,
- *         etree is an input argument, otherwise it is an output argument.
- *         Note: etree is a vector of parent pointers for a forest whose
- *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *
- * equed   (input/output) char*
- *         Specifies the form of equilibration that was done.
- *         = 'N': No equilibration.
- *         = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *         = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
- *         = 'B': Both row and column equilibration, i.e., A was replaced 
- *                by diag(R)*A*diag(C).
- *         If options->Fact = FACTORED, equed is an input argument,
- *         otherwise it is an output argument.
- *
- * R       (input/output) float*, dimension (A->nrow)
- *         The row scale factors for A or transpose(A).
- *         If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
- *         If equed = 'N' or 'C', R is not accessed.
- *         If options->Fact = FACTORED, R is an input argument,
- *             otherwise, R is output.
- *         If options->zFact = FACTORED and equed = 'R' or 'B', each element
- *             of R must be positive.
- * 
- * C       (input/output) float*, dimension (A->ncol)
- *         The column scale factors for A or transpose(A).
- *         If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
- *         If equed = 'N' or 'R', C is not accessed.
- *         If options->Fact = FACTORED, C is an input argument,
- *             otherwise, C is output.
- *         If options->Fact = FACTORED and equed = 'C' or 'B', each element
- *             of C must be positive.
- *         
- * L       (output) SuperMatrix*
- *        The factor L from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype SLU_= NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *
- * U       (output) SuperMatrix*
- *        The factor U from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * work    (workspace/output) void*, size (lwork) (in bytes)
- *         User supplied workspace, should be large enough
- *         to hold data structures for factors L and U.
- *         On exit, if fact is not 'F', L and U point to this array.
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               mem_usage->total_needed; no other side effects.
- *
- *         See argument 'mem_usage' for memory usage statistics.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         If B->ncol = 0, only LU decomposition is performed, the triangular
- *                         solve is skipped.
- *         On exit,
- *            if equed = 'N', B is not modified; otherwise
- *            if A->Stype = SLU_NC:
- *               if options->Trans = NOTRANS and equed = 'R' or 'B',
- *                  B is overwritten by diag(R)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
- *                  B is overwritten by diag(C)*B;
- *            if A->Stype = SLU_NR:
- *               if options->Trans = NOTRANS and equed = 'C' or 'B',
- *                  B is overwritten by diag(C)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
- *                  B is overwritten by diag(R)*B.
- *
- * X       (output) SuperMatrix*
- *         X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. 
- *         If info = 0 or info = A->ncol+1, X contains the solution matrix
- *         to the original system of equations. Note that A and B are modified
- *         on exit if equed is not 'N', and the solution to the equilibrated
- *         system is inv(diag(C))*X if options->Trans = NOTRANS and
- *         equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
- *         and equed = 'R' or 'B'.
- *
- * recip_pivot_growth (output) float*
- *         The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
- *         The infinity norm is used. If recip_pivot_growth is much less
- *         than 1, the stability of the LU factorization could be poor.
- *
- * rcond   (output) float*
- *         The estimate of the reciprocal condition number of the matrix A
- *         after equilibration (if done). If rcond is less than the machine
- *         precision (in particular, if rcond = 0), the matrix is singular
- *         to working precision. This condition is indicated by a return
- *         code of info > 0.
- *
- * FERR    (output) float*, dimension (B->ncol)   
- *         The estimated forward error bound for each solution vector   
- *         X(j) (the j-th column of the solution matrix X).   
- *         If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *         is an estimated upper bound for the magnitude of the largest 
- *         element in (X(j) - XTRUE) divided by the magnitude of the   
- *         largest element in X(j).  The estimate is as reliable as   
- *         the estimate for RCOND, and is almost always a slight   
- *         overestimate of the true error.
- *         If options->IterRefine = NOREFINE, ferr = 1.0.
- *
- * BERR    (output) float*, dimension (B->ncol)
- *         The componentwise relative backward error of each solution   
- *         vector X(j) (i.e., the smallest relative change in   
- *         any element of A or B that makes X(j) an exact solution).
- *         If options->IterRefine = NOREFINE, berr = 1.0.
- *
- * mem_usage (output) mem_usage_t*
- *         Record the memory usage statistics, consisting of following fields:
- *         - for_lu (float)
- *           The amount of space used in bytes for L\U data structures.
- *         - total_needed (float)
- *           The amount of space needed in bytes to perform factorization.
- *         - expansions (int)
- *           The number of memory expansions during the LU factorization.
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *         = 0: successful exit   
- *         < 0: if info = -i, the i-th argument had an illegal value   
- *         > 0: if info = i, and i is   
- *              <= A->ncol: U(i,i) is exactly zero. The factorization has   
- *                    been completed, but the factor U is exactly   
- *                    singular, so the solution and error bounds   
- *                    could not be computed.   
- *              = A->ncol+1: U is nonsingular, but RCOND is less than machine
- *                    precision, meaning that the matrix is singular to
- *                    working precision. Nevertheless, the solution and
- *                    error bounds are computed because there are a number
- *                    of situations where the computed solution can be more
- *                    accurate than the value of RCOND would suggest.   
- *              > A->ncol+1: number of bytes allocated when memory allocation
- *                    failure occurred, plus A->ncol.
- *
- */
-    DNformat  *Bstore, *Xstore;
-    complex    *Bmat, *Xmat;
-    int       ldb, ldx, nrhs;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int       colequ, equil, nofact, notran, rowequ, permc_spec;
-    trans_t   trant;
-    char      norm[1];
-    int       i, j, info1;
-    float    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
-    int       relax, panel_size;
-    float    diag_pivot_thresh, drop_tol;
-    double    t0;      /* temporary time */
-    double    *utime;
-    /* External functions */
-    extern float clangs(char *, SuperMatrix *);
-    extern double slamch_(char *);
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    *info = 0;
-    nofact = (options->Fact != FACTORED);
-    equil = (options->Equil == YES);
-    notran = (options->Trans == NOTRANS);
-    if ( nofact ) {
-       *(unsigned char *)equed = 'N';
-       rowequ = FALSE;
-       colequ = FALSE;
-    } else {
-       rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-       colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       smlnum = slamch_("Safe minimum");
-       bignum = 1. / smlnum;
-    }
-#if 0
-printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
-       options->Fact, options->Trans, *equed);
-    /* Test the input parameters */
-    if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
-       options->Fact != SamePattern_SameRowPerm &&
-       !notran && options->Trans != TRANS && options->Trans != CONJ &&
-       !equil && options->Equil != NO)
-       *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-             A->Dtype != SLU_C || A->Mtype != SLU_GE )
-       *info = -2;
-    else if (options->Fact == FACTORED &&
-            !(rowequ || colequ || lsame_(equed, "N")))
-       *info = -6;
-    else {
-       if (rowequ) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, R[j]);
-               rcmax = SUPERLU_MAX(rcmax, R[j]);
-           }
-           if (rcmin <= 0.) *info = -7;
-           else if ( A->nrow > 0)
-               rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else rowcnd = 1.;
-       }
-       if (colequ && *info == 0) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, C[j]);
-               rcmax = SUPERLU_MAX(rcmax, C[j]);
-           }
-           if (rcmin <= 0.) *info = -8;
-           else if (A->nrow > 0)
-               colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else colcnd = 1.;
-       }
-       if (*info == 0) {
-           if ( lwork < -1 ) *info = -12;
-           else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     B->Stype != SLU_DN || B->Dtype != SLU_C || 
-                     B->Mtype != SLU_GE )
-               *info = -13;
-           else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     (B->ncol != 0 && B->ncol != X->ncol) ||
-                      X->Stype != SLU_DN ||
-                     X->Dtype != SLU_C || X->Mtype != SLU_GE )
-               *info = -14;
-       }
-    }
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("cgssvx", &i);
-       return;
-    }
-    /* Initialization for factor parameters */
-    panel_size = sp_ienv(1);
-    relax      = sp_ienv(2);
-    diag_pivot_thresh = options->DiagPivotThresh;
-    drop_tol   = 0.0;
-    utime = stat->utime;
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       cCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       if ( notran ) { /* Reverse the transpose argument. */
-           trant = TRANS;
-           notran = 0;
-       } else {
-           trant = NOTRANS;
-           notran = 1;
-       }
-    } else { /* A->Stype == SLU_NC */
-       trant = options->Trans;
-       AA = A;
-    }
-    if ( nofact && equil ) {
-       t0 = SuperLU_timer_();
-       /* Compute row and column scalings to equilibrate the matrix A. */
-       cgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
-       if ( info1 == 0 ) {
-           /* Equilibrate matrix A. */
-           claqgs(AA, R, C, rowcnd, colcnd, amax, equed);
-           rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-           colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       }
-       utime[EQUIL] = SuperLU_timer_() - t0;
-    }
-    if ( nrhs > 0 ) {
-        /* Scale the right hand side if equilibration was performed. */
-        if ( notran ) {
-           if ( rowequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]);
-                   }
-           }
-        } else if ( colequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                    cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]);
-               }
-        }
-    }
-    if ( nofact ) {
-        t0 = SuperLU_timer_();
-       /*
-        * Gnet column permutation vector perm_c[], according to permc_spec:
-        *   permc_spec = NATURAL:  natural ordering 
-        *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-        *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-        *   permc_spec = COLAMD:   approximate minimum degree column ordering
-        *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-        */
-       permc_spec = options->ColPerm;
-       if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-            get_perm_c(permc_spec, AA, perm_c);
-       utime[COLPERM] = SuperLU_timer_() - t0;
-       t0 = SuperLU_timer_();
-       sp_preorder(options, AA, perm_c, etree, &AC);
-       utime[ETREE] = SuperLU_timer_() - t0;
-/*     printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-              relax, panel_size, sp_ienv(3), sp_ienv(4));
-       fflush(stdout); */
-       /* Compute the LU factorization of A*Pc. */
-       t0 = SuperLU_timer_();
-       cgstrf(options, &AC, drop_tol, relax, panel_size,
-              etree, work, lwork, perm_c, perm_r, L, U, stat, info);
-       utime[FACT] = SuperLU_timer_() - t0;
-       if ( lwork == -1 ) {
-           mem_usage->total_needed = *info - A->ncol;
-           return;
-       }
-    }
-    if ( options->PivotGrowth ) {
-        if ( *info > 0 ) {
-           if ( *info <= A->ncol ) {
-               /* Compute the reciprocal pivot growth factor of the leading
-                  rank-deficient *info columns of A. */
-               *recip_pivot_growth = cPivotGrowth(*info, AA, perm_c, L, U);
-           }
-           return;
-        }
-        /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
-        *recip_pivot_growth = cPivotGrowth(A->ncol, AA, perm_c, L, U);
-    }
-    if ( options->ConditionNumber ) {
-      if (*info == 0) {
-        /* Estimate the reciprocal of the condition number of A. */
-        t0 = SuperLU_timer_();
-        if ( notran ) {
-           *(unsigned char *)norm = '1';
-        } else {
-           *(unsigned char *)norm = 'I';
-        }
-        anorm = clangs(norm, AA);
-        cgscon(norm, L, U, anorm, rcond, stat, info);
-        utime[RCOND] = SuperLU_timer_() - t0;
-      } else *rcond = 0;
-    }
-    if ( *info == 0 && nrhs > 0 ) {
-        /* Compute the solution matrix X. */
-        for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
-            for (i = 0; i < B->nrow; i++)
-               Xmat[i + j*ldx] = Bmat[i + j*ldb];
-        t0 = SuperLU_timer_();
-        cgstrs (trant, L, U, perm_c, perm_r, X, stat, info);
-        utime[SOLVE] = SuperLU_timer_() - t0;
-        /* Use iterative refinement to improve the computed solution and 
-           error bounds and backward error estimates for it. */
-        t0 = SuperLU_timer_();
-        if ( options->IterRefine != NOREFINE ) {
-            cgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B,
-                   X, ferr, berr, stat, info);
-        } else {
-            for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0;
-        }
-        utime[REFINE] = SuperLU_timer_() - t0;
-        /* Transform the solution matrix X to a solution of the original 
system. */
-        if ( notran ) {
-           if ( colequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], C[i]);
-                   }
-           }
-        } else if ( rowequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                    cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]);
-                }
-        }
-    } /* end if nrhs > 0 */
-    if ( *info == 0 && options->ConditionNumber ) {
-        /* Set INFO = A->ncol+1 if the matrix is singular to working 
precision. */
-        if ( *rcond < slamch_("E") ) *info = A->ncol + 1;
-    }
-    if ( *info != -10000000 && nofact ) {
-        cQuerySpace(L, U, mem_usage);
-        Destroy_CompCol_Permuted(&AC);
-    }
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-    }
diff --git a/superlu/cgstrf.c b/superlu/cgstrf.c
deleted file mode 100644
index bd3cee09..00000000
--- a/superlu/cgstrf.c
+++ /dev/null
@@ -1,444 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_cdefs.h"
-extern void countnz();
-extern void fixupL();
-cgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
-        int relax, int panel_size, int *etree, void *work, int lwork,
-        int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
-        SuperLUStat_t *stat, int *info)
- * Purpose
- * =======
- *
- * CGSTRF computes an LU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper 
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE.
- *
- * drop_tol (input) float (NOT IMPLEMENTED)
- *         Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
- * relax    (input) int
- *          To control degree of relaxing supernodes. If the number
- *          of nodes (columns) in a subtree of the elimination tree is less
- *          than relax, this subtree is considered as one supernode,
- *          regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *          A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *          Elimination tree of A'*A.
- *          Note: etree is a vector of parent pointers for a forest whose
- *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *          On input, the columns of A should be permuted so that the
- *          etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *          User-supplied work space and space for the output data structures.
- *          Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *         Column permutation vector, which defines the 
- *          permutation matrix Pc; perm_c[i] = j means column i of A is 
- *          in position j in A*Pc.
- *          When searching for diagonal, perm_c[*] is applied to the
- *          row subscripts of A, so that diagonal threshold pivoting
- *          can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *          Row permutation vector which defines the permutation matrix Pr,
- *          perm_r[i] = j means row i of A is in position j in Pr*A.
- *          If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *             will try to use the input perm_r, unless a certain threshold
- *             criterion is violated. In that case, perm_r is overwritten by
- *             a new permutation determined by partial pivoting or diagonal
- *             threshold pivoting.
- *          Otherwise, perm_r is output argument;
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = SLU_NC, 
- *          Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *          = 0: successful exit
- *          < 0: if info = -i, the i-th argument had an illegal value
- *          > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                and division by zero will occur if it is used to solve a
- *                system of equations.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol. If lwork = -1, it is
- *                the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays: 
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   xprune[0:n-1]: xprune[*] points to locations in subscript 
- *     vector lsub[*]. For column i, xprune[i] denotes the point where 
- *     structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need 
- *     to be traversed for symbolic factorization.
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been 
- *     reached when working on column j.
- *     Storage: relative to original row subscripts
- *     NOTE: There are 3 of them: marker/marker1 are used for panel dfs, 
- *           see cpanel_dfs.c; marker2 is used for inner-factorization,
- *            see ccolumn_dfs.c.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *      Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs) 
- *     unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *     in topological order of the dfs. A supernode representative is the 
- *     last column of a supernode.
- *      The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a 
- *     supernodal representative r, repfnz[r] is the location of the first 
- *     nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *     indicates the supernode r has been explored.
- *     NOTE: There are W of them, each used for one column of a panel. 
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below 
- *      the panel diagonal. These are filled in during cpanel_dfs(), and are
- *      used later in the inner LU factorization within the panel.
- *     panel_lsub[]/dense[] pair forms the SPA data structure.
- *     NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *                NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *     The size of this array is defined by NUM_TEMPV() in csp_defs.h.
- *
- */
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when 
-                                  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *iwork;
-    complex    *cwork;
-    int              *segrep, *repfnz, *parent, *xplore;
-    int              *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide 
SPA */
-    int              *xprune;
-    int              *marker;
-    complex    *dense, *tempv;
-    int       *relax_end;
-    complex    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    int       pivrow;   /* pivotal row number in the original matrix A */
-    int       nseg1;   /* no of segments in U-column above panel row jcol */
-    int       nseg;    /* no of segments in each U-column */
-    register int jcol; 
-    register int kcol; /* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;   /* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-    iinfo    = 0;
-    m        = A->nrow;
-    n        = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a        = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-    /* Allocate storage common to the factor routines */
-    *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz,
-                       panel_size, L, U, &Glu, &iwork, &cwork);
-    if ( *info ) return;
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-            &repfnz, &panel_lsub, &xprune, &marker);
-    cSetRWork(m, panel_size, cwork, &dense, &tempv);
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-       /* Compute the inverse of perm_r */
-       iperm_r = (int *) intMalloc(m);
-       for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-       iperm_r_allocated = 1;
-    }
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES ) {
-        heap_relax_snode(n, etree, relax, marker, relax_end); 
-    } else {
-        relax_snode(n, etree, relax, marker, relax_end); 
-    }
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-    /* 
-     * Work on one "panel" at a time. A panel is one of the following: 
-     *    (a) a relaxed supernode at the bottom of the etree, or
-     *    (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-      if (handle_getfem_callback() != 0) {
-       iinfo = *info = -333333333; goto HOUSTON_WE_HAVE_A_PROBLEM; 
-       break;
-      }
-       if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-           kcol = relax_end[jcol];       /* end of the relaxed snode */
-           panel_histo[kcol-jcol+1]++;
-           /* --------------------------------------
-            * Factorize the relaxed supernode(jcol:kcol) 
-            * -------------------------------------- */
-           /* Determine the union of the row structure of the snode */
-           if ( (*info = csnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-                                   xprune, marker, &Glu)) != 0 )
-               return;
-            nextu    = xusub[jcol];
-           nextlu   = xlusup[jcol];
-           jsupno   = supno[jcol];
-           fsupc    = xsup[jsupno];
-           new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-           nzlumax = Glu.nzlumax;
-           while ( new_next > nzlumax ) {
-               if ( (*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) 
-                   return;
-           }
-           for (icol = jcol; icol<= kcol; icol++) {
-               xusub[icol+1] = nextu;
-               /* Scatter into SPA dense[*] */
-               for (k = xa_begin[icol]; k < xa_end[icol]; k++)
-                   dense[asub[k]] = a[k];
-               /* Numeric update within the snode */
-               csnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-               if ( (*info = cpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-#ifdef DEBUG
-               cprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
-           }
-           jcol = icol;
-       } else { /* Work on one panel of panel_size columns */
-           /* Adjust panel_size so that a panel won't overlap with the next 
-            * relaxed snode.
-            */
-           panel_size = w_def;
-           for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) 
-               if ( relax_end[k] != EMPTY ) {
-                   panel_size = k - jcol;
-                   break;
-               }
-           if ( k == min_mn ) panel_size = min_mn - jcol;
-           panel_histo[panel_size]++;
-           /* symbolic factor on a panel of columns */
-           cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                     dense, panel_lsub, segrep, repfnz, xprune,
-                     marker, parent, xplore, &Glu);
-           /* numeric sup-panel updates in topological order */
-           cpanel_bmod(m, panel_size, jcol, nseg1, dense,
-                       tempv, segrep, repfnz, &Glu, stat);
-           /* Sparse LU within the panel, and below panel diagonal */
-           for ( jj = jcol; jj < jcol + panel_size; jj++) {
-               k = (jj - jcol) * m; /* column index for w-wide arrays */
-               nseg = nseg1;   /* Begin after all the panel segments */
-               if ((*info = ccolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
-                                       segrep, &repfnz[k], xprune, marker,
-                                       parent, xplore, &Glu)) != 0) 
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-               /* Numeric updates */
-               if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k],
-                                        tempv, &segrep[nseg1], &repfnz[k],
-                                        jcol, &Glu, stat)) != 0) 
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-               /* Copy the U-segments to ucol[*] */
-               if ((*info = ccopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-                                         perm_r, &dense[k], &Glu)) != 0)
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-               if ( (*info = cpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-               /* Prune columns (0:jj-1) using column jj */
-               cpruneL(jj, perm_r, pivrow, nseg, segrep,
-                        &repfnz[k], xprune, &Glu);
-               /* Reset repfnz[] for this column */
-               resetrep_col (nseg, segrep, &repfnz[k]);
-#ifdef DEBUG
-               cprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
-           }
-           jcol += panel_size; /* Move to the next panel */
-       } /* else */
-    } /* for */
-    *info = iinfo;
-    if ( m > n ) {
-       k = 0;
-        for (i = 0; i < m; ++i) 
-            if ( perm_r[i] == EMPTY ) {
-               perm_r[i] = n + k;
-               ++k;
-           }
-    }
-    if (*info == 0) {
-      countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
-      fixupL(min_mn, perm_r, &Glu);
-    }
-    cLUWorkFree(iwork, cwork, &Glu); /* Free work space and compress storage */
-    if ( fact == SamePattern_SameRowPerm ) {
-        /* L and U structures may have changed due to possibly different
-          pivoting, even though the storage is available.
-          There could also be memory expansions, so the array locations
-           may have changed, */
-        ((SCformat *)L->Store)->nnz = nnzL;
-       ((SCformat *)L->Store)->nsuper = Glu.supno[n];
-       ((SCformat *)L->Store)->nzval = Glu.lusup;
-       ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-       ((SCformat *)L->Store)->rowind = Glu.lsub;
-       ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-       ((NCformat *)U->Store)->nnz = nnzU;
-       ((NCformat *)U->Store)->nzval = Glu.ucol;
-       ((NCformat *)U->Store)->rowind = Glu.usub;
-       ((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-        cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, 
-                                Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-                                Glu.xsup, SLU_SC, SLU_C, SLU_TRLU);
-       cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, 
-                              Glu.usub, Glu.xusub, SLU_NC, SLU_C, SLU_TRU);
-    }
-    ops[FACT] += ops[TRSV] + ops[GEMV];        
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
diff --git a/superlu/cgstrs.c b/superlu/cgstrs.c
deleted file mode 100644
index b2d19970..00000000
--- a/superlu/cgstrs.c
+++ /dev/null
@@ -1,344 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_cdefs.h"
-extern void ctrsm_();
-extern void cgemm_();
- * Function prototypes 
- */
-void cusolve(int, int, complex*, complex*);
-void clsolve(int, int, complex*, complex*);
-void cmatvec(int, int, int, complex*, complex*, complex*);
-cgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
- * Purpose
- * =======
- *
- * CGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         cgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         cgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *        Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *        < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-    int      incx = 1, incy = 1;
-    complex   alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    complex   *work_col;
-    complex   temp_comp;
-    DNformat *Bstore;
-    complex   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    complex   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    complex   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void cprint_soln();
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
-       *info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
-       *info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
-       *info = -6;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("cgstrs", &i);
-       return;
-    }
-    n = L->nrow;
-    work = complexCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = complexMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    if ( trans == NOTRANS ) {
-       /* Permute right hand sides to form Pr*B */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       /* Forward solve PLy=Pb. */
-       for (k = 0; k <= Lstore->nsuper; k++) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           nrow = nsupr - nsupc;
-           solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
-           solve_ops += 8 * nrow * nsupc * nrhs;
-           if ( nsupc == 1 ) {
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   luptr = L_NZ_START(fsupc);
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       cc_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
-                       c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-                   }
-               }
-           } else {
-               luptr = L_NZ_START(fsupc);
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("N", strlen("N"));
-               ftcs3 = _cptofcd("U", strlen("U"));
-               CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-               ctrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               cgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   work_col = &work[j*n];
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       c_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
-                       work_col[i].r = 0.0;
-                       work_col[i].i = 0.0;
-                       iptr++;
-                   }
-               }
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   clsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-                   cmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-                           &rhs_work[fsupc], &work[0] );
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       c_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
-                       work[i].r = 0.;
-                       work[i].i = 0.;
-                       iptr++;
-                   }
-               }
-           } /* else ... */
-       } /* for L-solve */
-#ifdef DEBUG
-       printf("After L-solve: y=\n");
-       cprint_soln(n, nrhs, Bmat);
-       /*
-        * Back solve Ux=y.
-        */
-       for (k = Lstore->nsuper; k >= 0; k--) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           luptr = L_NZ_START(fsupc);
-           solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;
-           if ( nsupc == 1 ) {
-               rhs_work = &Bmat[0];
-               for (j = 0; j < nrhs; j++) {
-                   c_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
-                   rhs_work += ldb;
-               }
-           } else {
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("U", strlen("U"));
-               ftcs3 = _cptofcd("N", strlen("N"));
-               CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               ctrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               for (j = 0; j < nrhs; j++)
-                   cusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-           }
-           for (j = 0; j < nrhs; ++j) {
-               rhs_work = &Bmat[j*ldb];
-               for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-                       irow = U_SUB(i);
-                       cc_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
-                       c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-                   }
-               }
-           }
-       } /* for U-solve */
-#ifdef DEBUG
-       printf("After U-solve: x=\n");
-       cprint_soln(n, nrhs, Bmat);
-       /* Compute the final solution X := Pc*X. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-        stat->ops[SOLVE] = solve_ops;
-    } else { /* Solve A'*X=B or CONJ(A)*X=B */
-       /* Permute right hand sides to form Pc'*B. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       stat->ops[SOLVE] = 0;
-        if (trans == TRANS) {
-           for (k = 0; k < nrhs; ++k) {
-               /* Multiply by inv(U'). */
-               sp_ctrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-               /* Multiply by inv(L'). */
-               sp_ctrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-           }
-         } else { /* trans == CONJ */
-            for (k = 0; k < nrhs; ++k) {                
-                /* Multiply by conj(inv(U')). */
-                sp_ctrsv("U", "C", "N", L, U, &Bmat[k*ldb], stat, info);
-                /* Multiply by conj(inv(L')). */
-                sp_ctrsv("L", "C", "U", L, U, &Bmat[k*ldb], stat, info);
-           }
-         }
-       /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-    }
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
- * Diagnostic print of the solution vector 
- */
-cprint_soln(int n, int nrhs, complex *soln)
-    int i;
-    for (i = 0; i < n; i++) 
-       printf("\t%d: %.4f\n", i, soln[i].r);
diff --git a/superlu/clacon.c b/superlu/clacon.c
deleted file mode 100644
index 6e332f67..00000000
--- a/superlu/clacon.c
+++ /dev/null
@@ -1,236 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <math.h>
-#include "slu_Cnames.h"
-#include "slu_scomplex.h"
-extern void ccopy_();
-clacon_(int *n, complex *v, complex *x, float *est, int *kase)
-    Purpose   
-    =======   
-    CLACON estimates the 1-norm of a square matrix A.   
-    Reverse communication is used for evaluating matrix-vector products. 
-    Arguments   
-    =========   
-    N      (input) INT
-           The order of the matrix.  N >= 1.   
-    V      (workspace) COMPLEX PRECISION array, dimension (N)   
-           On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
-           (W is not returned).   
-    X      (input/output) COMPLEX PRECISION array, dimension (N)   
-           On an intermediate return, X should be overwritten by   
-                 A * X,   if KASE=1,   
-                 A' * X,  if KASE=2,
-           where A' is the conjugate transpose of A,
-           and CLACON must be re-called with all the other parameters   
-           unchanged.   
-    EST    (output) FLOAT PRECISION   
-           An estimate (a lower bound) for norm(A).   
-    KASE   (input/output) INT
-           On the initial call to CLACON, KASE should be 0.   
-           On an intermediate return, KASE will be 1 or 2, indicating   
-           whether X should be overwritten by A * X  or A' * X.   
-           On the final return from CLACON, KASE will again be 0.   
-    Further Details   
-    ======= =======   
-    Contributed by Nick Higham, University of Manchester.   
-    Originally named CONEST, dated March 16, 1988.   
-    Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
-    a real or complex matrix, with applications to condition estimation", 
-    ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
-    ===================================================================== 
-    /* Table of constant values */
-    int c__1 = 1;
-    complex      zero = {0.0, 0.0};
-    complex      one = {1.0, 0.0};
-    /* System generated locals */
-    float d__1;
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static float altsgn, estold;
-    static int i, j;
-    float temp;
-    float safmin;
-    extern double slamch_(char *);
-    extern int icmax1_(int *, complex *, int *);
-    extern double scsum1_(int *, complex *, int *);
-    safmin = slamch_("Safe minimum");
-    if ( *kase == 0 ) {
-       for (i = 0; i < *n; ++i) {
-           x[i].r = 1. / (float) (*n);
-           x[i].i = 0.;
-       }
-       *kase = 1;
-       jump = 1;
-       return 0;
-    }
-    switch (jump) {
-       case 1:  goto L20;
-       case 2:  goto L40;
-       case 3:  goto L70;
-       case 4:  goto L110;
-       case 5:  goto L140;
-    }
-    /*     ................ ENTRY   (JUMP = 1)   
-  L20:
-    if (*n == 1) {
-       v[0] = x[0];
-       *est = c_abs(&v[0]);
-       /*        ... QUIT */
-       goto L150;
-    }
-    *est = scsum1_(n, x, &c__1);
-    for (i = 0; i < *n; ++i) {
-       d__1 = c_abs(&x[i]);
-       if (d__1 > safmin) {
-           d__1 = 1 / d__1;
-           x[i].r *= d__1;
-           x[i].i *= d__1;
-       } else {
-           x[i] = one;
-       }
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 2)   
-    j = icmax1_(n, &x[0], &c__1);
-    --j;
-    iter = 2;
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 3)   
-#ifdef _CRAY
-    CCOPY(n, x, &c__1, v, &c__1);
-    ccopy_(n, x, &c__1, v, &c__1);
-    estold = *est;
-    *est = scsum1_(n, v, &c__1);
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-    for (i = 0; i < *n; ++i) {
-       d__1 = c_abs(&x[i]);
-       if (d__1 > safmin) {
-           d__1 = 1 / d__1;
-           x[i].r *= d__1;
-           x[i].i *= d__1;
-       } else {
-           x[i] = one;
-       }
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 4)   
-    jlast = j;
-    j = icmax1_(n, &x[0], &c__1);
-    --j;
-    if (x[jlast].r != (d__1 = x[j].r, fabs(d__1)) && iter < 5) {
-       ++iter;
-       goto L50;
-    }
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-       x[i-1].r = altsgn * ((float)(i - 1) / (float)(*n - 1) + 1.);
-       x[i-1].i = 0.;
-       altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 5)   
-    temp = scsum1_(n, x, &c__1) / (float)(*n * 3) * 2.;
-    if (temp > *est) {
-#ifdef _CRAY
-       CCOPY(n, &x[0], &c__1, &v[0], &c__1);
-       ccopy_(n, &x[0], &c__1, &v[0], &c__1);
-       *est = temp;
-    }
-    *kase = 0;
-    return 0;
-} /* clacon_ */
diff --git a/superlu/clangs.c b/superlu/clangs.c
deleted file mode 100644
index f24c3808..00000000
--- a/superlu/clangs.c
+++ /dev/null
@@ -1,132 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  clangs.c
- * History:     Modified from lapack routine CLANGE
- */
-#include <math.h>
-#include "slu_cdefs.h"
-float clangs(char *norm, SuperMatrix *A)
-    Purpose   
-    =======   
-    CLANGS returns the value of the one norm, or the Frobenius norm, or 
-    the infinity norm, or the element of largest absolute value of a 
-    real matrix A.   
-    Description   
-    ===========   
-    CLANGE returns the value   
-       CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
-                (   
-                ( norm1(A),         NORM = '1', 'O' or 'o'   
-                (   
-                ( normI(A),         NORM = 'I' or 'i'   
-                (   
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
-    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
-    Arguments   
-    =========   
-    NORM    (input) CHARACTER*1   
-            Specifies the value to be returned in CLANGE as described above.   
-    A       (input) SuperMatrix*
-            The M by N sparse matrix A. 
-   ===================================================================== 
-    /* Local variables */
-    NCformat *Astore;
-    complex   *Aval;
-    int      i, j, irow;
-    float   value, sum;
-    float   *rwork;
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) {
-       value = 0.;
-    } else if (lsame_(norm, "M")) {
-       /* Find max(abs(A(i,j))). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
-               value = SUPERLU_MAX( value, c_abs( &Aval[i]) );
-    } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') {
-       /* Find norm1(A). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j) {
-           sum = 0.;
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) 
-               sum += c_abs( &Aval[i] );
-           value = SUPERLU_MAX(value,sum);
-       }
-    } else if (lsame_(norm, "I")) {
-       /* Find normI(A). */
-       if ( !(rwork = (float *) SUPERLU_MALLOC(A->nrow * sizeof(float))) )
-           ABORT("SUPERLU_MALLOC fails for rwork.");
-       for (i = 0; i < A->nrow; ++i) rwork[i] = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) {
-               irow = Astore->rowind[i];
-               rwork[irow] += c_abs( &Aval[i] );
-           }
-       value = 0.;
-       for (i = 0; i < A->nrow; ++i)
-           value = SUPERLU_MAX(value, rwork[i]);
-       SUPERLU_FREE (rwork);
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
-       /* Find normF(A). */
-       ABORT("Not implemented.");
-    } else
-       ABORT("Illegal norm specified.");
-    return (value);
-} /* clangs */
diff --git a/superlu/claqgs.c b/superlu/claqgs.c
deleted file mode 100644
index cfecc376..00000000
--- a/superlu/claqgs.c
+++ /dev/null
@@ -1,160 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  claqgs.c
- * History:     Modified from LAPACK routine CLAQGE
- */
-#include <math.h>
-#include "slu_cdefs.h"
-claqgs(SuperMatrix *A, float *r, float *c, 
-       float rowcnd, float colcnd, float amax, char *equed)
-    Purpose   
-    =======   
-    CLAQGS equilibrates a general sparse M by N matrix A using the row and   
-    scaling factors in the vectors R and C.   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    A       (input/output) SuperMatrix*
-            On exit, the equilibrated matrix.  See EQUED for the form of 
-            the equilibrated matrix. The type of A can be:
-           Stype = NC; Dtype = SLU_C; Mtype = GE.
-    R       (input) float*, dimension (A->nrow)
-            The row scale factors for A.
-    C       (input) float*, dimension (A->ncol)
-            The column scale factors for A.
-    ROWCND  (input) float
-            Ratio of the smallest R(i) to the largest R(i).
-    COLCND  (input) float
-            Ratio of the smallest C(i) to the largest C(i).
-    AMAX    (input) float
-            Absolute value of largest matrix entry.
-    EQUED   (output) char*
-            Specifies the form of equilibration that was done.   
-            = 'N':  No equilibration   
-            = 'R':  Row equilibration, i.e., A has been premultiplied by  
-                    diag(R).   
-            = 'C':  Column equilibration, i.e., A has been postmultiplied  
-                    by diag(C).   
-            = 'B':  Both row and column equilibration, i.e., A has been
-                    replaced by diag(R) * A * diag(C).   
-    Internal Parameters   
-    ===================   
-    THRESH is a threshold value used to decide if row or column scaling   
-    should be done based on the ratio of the row or column scaling   
-    factors.  If ROWCND < THRESH, row scaling is done, and if   
-    COLCND < THRESH, column scaling is done.   
-    LARGE and SMALL are threshold values used to decide if row scaling   
-    should be done based on the absolute size of the largest matrix   
-    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
-    ===================================================================== 
-#define THRESH    (0.1)
-    /* Local variables */
-    NCformat *Astore;
-    complex   *Aval;
-    int i, j, irow;
-    float large, small, cj;
-    extern double slamch_(char *);
-    float temp;
-    /* Quick return if possible */
-    if (A->nrow <= 0 || A->ncol <= 0) {
-       *(unsigned char *)equed = 'N';
-       return;
-    }
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Initialize LARGE and SMALL. */
-    small = slamch_("Safe minimum") / slamch_("Precision");
-    large = 1. / small;
-    if (rowcnd >= THRESH && amax >= small && amax <= large) {
-       if (colcnd >= THRESH)
-           *(unsigned char *)equed = 'N';
-       else {
-           /* Column scaling */
-           for (j = 0; j < A->ncol; ++j) {
-               cj = c[j];
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   cs_mult(&Aval[i], &Aval[i], cj);
-                }
-           }
-           *(unsigned char *)equed = 'C';
-       }
-    } else if (colcnd >= THRESH) {
-       /* Row scaling, no column scaling */
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               cs_mult(&Aval[i], &Aval[i], r[irow]);
-           }
-       *(unsigned char *)equed = 'R';
-    } else {
-       /* Row and column scaling */
-       for (j = 0; j < A->ncol; ++j) {
-           cj = c[j];
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               temp = cj * r[irow];
-               cs_mult(&Aval[i], &Aval[i], temp);
-           }
-       }
-       *(unsigned char *)equed = 'B';
-    }
-    return;
-} /* claqgs */
diff --git a/superlu/cmemory.c b/superlu/cmemory.c
deleted file mode 100644
index 3f364b46..00000000
--- a/superlu/cmemory.c
+++ /dev/null
@@ -1,691 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_cdefs.h"
-/* Constants */
-#define NO_MEMTYPE  4      /* 0: lusup;
-                             1: ucol;
-                             2: lsub;
-                             3: usub */
-#define GluIntArray(n)   (5 * (n) + 5)
-/* Internal prototypes */
-void  *cexpand (int *, MemType,int, int, GlobalLU_t *);
-int   cLUWorkInit (int, int, int, int **, complex **, LU_space_t);
-void  copy_mem_complex (int, void *, void *);
-void  cStackCompress (GlobalLU_t *);
-void  cSetupSpace (void *, int, LU_space_t *);
-void  *cuser_malloc (int, int);
-void  cuser_free (int, int);
-/* External prototypes (in memory.c - prec-indep) */
-extern void    copy_mem_int    (int, void *, void *);
-extern void    user_bcopy      (char *, char *, int);
-/* Headers for 4 types of dynamatically managed memory */
-typedef struct e_node {
-    int size;      /* length of the memory that has been used */
-    void *mem;     /* pointer to the new malloc'd store */
-} ExpHeader;
-typedef struct {
-    int  size;
-    int  used;
-    int  top1;  /* grow upward, relative to &array[0] */
-    int  top2;  /* grow downward */
-    void *array;
-} LU_stack_t;
-/* Variables local to this file */
-static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
-static LU_stack_t stack;
-static int no_expand;
-/* Macros to manipulate stack */
-#define StackFull(x)         ( x + stack.used >= stack.size )
-#define NotDoubleAlign(addr) ( (long int)addr & 7 )
-#define DoubleAlign(addr)    ( ((long int)addr + 7) & ~7L )
-#define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
-                             (w + 1) * m * sizeof(complex) )
-#define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
- * Setup the memory model to be used for factorization.
- *    lwork = 0: use system malloc;
- *    lwork > 0: use user-supplied work[] space.
- */
-void cSetupSpace(void *work, int lwork, LU_space_t *MemModel)
-    if ( lwork == 0 ) {
-       *MemModel = SYSTEM; /* malloc/free */
-    } else if ( lwork > 0 ) {
-       *MemModel = USER;   /* user provided space */
-       stack.used = 0;
-       stack.top1 = 0;
-       stack.top2 = (lwork/4)*4; /* must be word addressable */
-       stack.size = stack.top2;
-       stack.array = (void *) work;
-    }
-void *cuser_malloc(int bytes, int which_end)
-    void *buf;
-    if ( StackFull(bytes) ) return (NULL);
-    if ( which_end == HEAD ) {
-       buf = (char*) stack.array + stack.top1;
-       stack.top1 += bytes;
-    } else {
-       stack.top2 -= bytes;
-       buf = (char*) stack.array + stack.top2;
-    }
-    stack.used += bytes;
-    return buf;
-void cuser_free(int bytes, int which_end)
-    if ( which_end == HEAD ) {
-       stack.top1 -= bytes;
-    } else {
-       stack.top2 += bytes;
-    }
-    stack.used -= bytes;
- * mem_usage consists of the following fields:
- *    - for_lu (float)
- *      The amount of space used in bytes for the L\U data structures.
- *    - total_needed (float)
- *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
- */
-int cQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
-    SCformat *Lstore;
-    NCformat *Ustore;
-    register int n, iword, dword, panel_size = sp_ienv(1);
-    Lstore = L->Store;
-    Ustore = U->Store;
-    n = L->ncol;
-    iword = sizeof(int);
-    dword = sizeof(complex);
-    /* For LU factors */
-    mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
-                                dword + Lstore->rowind_colptr[n] * iword );
-    mem_usage->for_lu += (float)( (n + 1) * iword +
-                                Ustore->colptr[n] * (dword + iword) );
-    /* Working storage to support factorization */
-    mem_usage->total_needed = mem_usage->for_lu +
-       (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
-               (panel_size + 1) * n * dword );
-    mem_usage->expansions = --no_expand;
-    return 0;
-} /* cQuerySpace */
- * Allocate storage for the data structures common to all factor routines.
- * For those unpredictable size, make a guess as FILL * nnz(A).
- * Return value:
- *     If lwork = -1, return the estimated amount of space required, plus n;
- *     otherwise, return the amount of space actually allocated when
- *     memory allocation failure occurred.
- */
-cLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
-         int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
-         int **iwork, complex **dwork)
-    int      info, iword, dword;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    int      *xsup, *supno;
-    int      *lsub, *xlsub;
-    complex   *lusup;
-    int      *xlusup;
-    complex   *ucol;
-    int      *usub, *xusub;
-    int      nzlmax, nzumax, nzlumax;
-    int      FILL = sp_ienv(6);
-    Glu->n    = n;
-    no_expand = 0;
-    iword     = sizeof(int);
-    dword     = sizeof(complex);
-    if ( !expanders )  
-        expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
-    if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
-    if ( fact != SamePattern_SameRowPerm ) {
-       /* Guess for L\U factors */
-       nzumax = nzlumax = FILL * annz;
-       nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else {
-           cSetupSpace(work, lwork, &Glu->MemModel);
-       }
-#if ( PRNTlevel >= 1 )
-       printf("cLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
-              FILL, nzlmax, nzumax);
-       fflush(stdout);
-       /* Integer pointers for L\U factors */
-       if ( Glu->MemModel == SYSTEM ) {
-           xsup   = intMalloc(n+1);
-           supno  = intMalloc(n+1);
-           xlsub  = intMalloc(n+1);
-           xlusup = intMalloc(n+1);
-           xusub  = intMalloc(n+1);
-       } else {
-           xsup   = (int *)cuser_malloc((n+1) * iword, HEAD);
-           supno  = (int *)cuser_malloc((n+1) * iword, HEAD);
-           xlsub  = (int *)cuser_malloc((n+1) * iword, HEAD);
-           xlusup = (int *)cuser_malloc((n+1) * iword, HEAD);
-           xusub  = (int *)cuser_malloc((n+1) * iword, HEAD);
-       }
-       lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
-       ucol  = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
-       lsub  = (int *)    cexpand( &nzlmax, LSUB, 0, 0, Glu );
-       usub  = (int *)    cexpand( &nzumax, USUB, 0, 1, Glu );
-       while ( !lusup || !ucol || !lsub || !usub ) {
-           if ( Glu->MemModel == SYSTEM ) {
-               SUPERLU_FREE(lusup); 
-               SUPERLU_FREE(ucol); 
-               SUPERLU_FREE(lsub); 
-               SUPERLU_FREE(usub);
-           } else {
-               cuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
-           }
-           nzlumax /= 2;
-           nzumax /= 2;
-           nzlmax /= 2;
-           if ( nzlumax < annz ) {
-               printf("Not enough memory to perform factorization.\n");
-               return (cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-           }
-#if ( PRNTlevel >= 1)
-           printf("cLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
-                  nzlmax, nzumax);
-           fflush(stdout);
-           lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
-           ucol  = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
-           lsub  = (int *)    cexpand( &nzlmax, LSUB, 0, 0, Glu );
-           usub  = (int *)    cexpand( &nzumax, USUB, 0, 1, Glu );
-       }
-    } else {
-       /* fact == SamePattern_SameRowPerm */
-       Lstore   = L->Store;
-       Ustore   = U->Store;
-       xsup     = Lstore->sup_to_col;
-       supno    = Lstore->col_to_sup;
-       xlsub    = Lstore->rowind_colptr;
-       xlusup   = Lstore->nzval_colptr;
-       xusub    = Ustore->colptr;
-       nzlmax   = Glu->nzlmax;    /* max from previous factorization */
-       nzumax   = Glu->nzumax;
-       nzlumax  = Glu->nzlumax;
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else if ( lwork == 0 ) {
-           Glu->MemModel = SYSTEM;
-       } else {
-           Glu->MemModel = USER;
-           stack.top2 = (lwork/4)*4; /* must be word-addressable */
-           stack.size = stack.top2;
-       }
-       lsub  = expanders[LSUB].mem  = Lstore->rowind;
-       lusup = expanders[LUSUP].mem = Lstore->nzval;
-       usub  = expanders[USUB].mem  = Ustore->rowind;
-       ucol  = expanders[UCOL].mem  = Ustore->nzval;;
-       expanders[LSUB].size         = nzlmax;
-       expanders[LUSUP].size        = nzlumax;
-       expanders[USUB].size         = nzumax;
-       expanders[UCOL].size         = nzumax;  
-    }
-    Glu->xsup    = xsup;
-    Glu->supno   = supno;
-    Glu->lsub    = lsub;
-    Glu->xlsub   = xlsub;
-    Glu->lusup   = lusup;
-    Glu->xlusup  = xlusup;
-    Glu->ucol    = ucol;
-    Glu->usub    = usub;
-    Glu->xusub   = xusub;
-    Glu->nzlmax  = nzlmax;
-    Glu->nzumax  = nzumax;
-    Glu->nzlumax = nzlumax;
-    info = cLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
-    if ( info )
-       return ( info + cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-    ++no_expand;
-    return 0;
-} /* cLUMemInit */
-/* Allocate known working storage. Returns 0 if success, otherwise
-   returns the number of bytes allocated so far when failure occurred. */
-cLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
-            complex **dworkptr, LU_space_t MemModel)
-    int    isize, dsize, extra;
-    complex *old_ptr;
-    int    maxsuper = sp_ienv(3),
-           rowblk   = sp_ienv(4);
-    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
-    dsize = (m * panel_size +
-            NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(complex);
-    if ( MemModel == SYSTEM ) 
-       *iworkptr = (int *) intCalloc(isize/sizeof(int));
-    else
-       *iworkptr = (int *) cuser_malloc(isize, TAIL);
-    if ( ! *iworkptr ) {
-       fprintf(stderr, "cLUWorkInit: malloc fails for local iworkptr[]\n");
-       return (isize + n);
-    }
-    if ( MemModel == SYSTEM )
-       *dworkptr = (complex *) SUPERLU_MALLOC(dsize);
-    else {
-       *dworkptr = (complex *) cuser_malloc(dsize, TAIL);
-       if ( NotDoubleAlign(*dworkptr) ) {
-           old_ptr = *dworkptr;
-           *dworkptr = (complex*) DoubleAlign(*dworkptr);
-           *dworkptr = (complex*) ((double*)*dworkptr - 1);
-           extra = (char*)old_ptr - (char*)*dworkptr;
-#ifdef DEBUG       
-           printf("cLUWorkInit: not aligned, extra %d\n", extra);
-           stack.top2 -= extra;
-           stack.used += extra;
-       }
-    }
-    if ( ! *dworkptr ) {
-       fprintf(stderr, "malloc fails for local dworkptr[].");
-       return (isize + dsize + n);
-    }
-    return 0;
- * Set up pointers for real working arrays.
- */
-cSetRWork(int m, int panel_size, complex *dworkptr,
-        complex **dense, complex **tempv)
-    complex zero = {0.0, 0.0};
-    int maxsuper = sp_ienv(3),
-        rowblk   = sp_ienv(4);
-    *dense = dworkptr;
-    *tempv = *dense + panel_size*m;
-    cfill (*dense, m * panel_size, zero);
-    cfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
- * Free the working storage used by factor routines.
- */
-void cLUWorkFree(int *iwork, complex *dwork, GlobalLU_t *Glu)
-    if ( Glu->MemModel == SYSTEM ) {
-       SUPERLU_FREE (iwork);
-       SUPERLU_FREE (dwork);
-    } else {
-       stack.used -= (stack.size - stack.top2);
-       stack.top2 = stack.size;
-/*     cStackCompress(Glu);  */
-    }
-    SUPERLU_FREE (expanders);  
-    expanders = 0;
-/* Expand the data structures for L and U during the factorization.
- * Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-cLUMemXpand(int jcol,
-          int next,          /* number of elements currently in the factors */
-          MemType mem_type,  /* which type of memory to expand  */
-          int *maxlen,       /* modified - maximum length of a data structure 
-          GlobalLU_t *Glu    /* modified - global LU data structures */
-          )
-    void   *new_mem;
-#ifdef DEBUG    
-    printf("cLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
-          jcol, next, *maxlen, mem_type);
-    if (mem_type == USUB) 
-       new_mem = cexpand(maxlen, mem_type, next, 1, Glu);
-    else
-       new_mem = cexpand(maxlen, mem_type, next, 0, Glu);
-    if ( !new_mem ) {
-       int    nzlmax  = Glu->nzlmax;
-       int    nzumax  = Glu->nzumax;
-       int    nzlumax = Glu->nzlumax;
-       fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
-       return (cmemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
-    }
-    switch ( mem_type ) {
-      case LUSUP:
-       Glu->lusup   = (complex *) new_mem;
-       Glu->nzlumax = *maxlen;
-       break;
-      case UCOL:
-       Glu->ucol   = (complex *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-      case LSUB:
-       Glu->lsub   = (int *) new_mem;
-       Glu->nzlmax = *maxlen;
-       break;
-      case USUB:
-       Glu->usub   = (int *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-    }
-    return 0;
-copy_mem_complex(int howmany, void *old, void *new)
-    register int i;
-    complex *dold = old;
-    complex *dnew = new;
-    for (i = 0; i < howmany; i++) dnew[i] = dold[i];
- * Expand the existing storage to accommodate more fill-ins.
- */
-*cexpand (
-        int *prev_len,   /* length used from previous call */
-        MemType type,    /* which part of the memory to expand */
-        int len_to_copy, /* size of the memory to be copied to new store */
-        int keep_prev,   /* = 1: use prev_len;
-                            = 0: compute new_len to expand */
-        GlobalLU_t *Glu  /* modified - global LU data structures */
-       )
-    float    EXPAND = 1.5;
-    float    alpha;
-    void     *new_mem, *old_mem;
-    int      new_len, tries, lword, extra, bytes_to_copy;
-    alpha = EXPAND;
-    if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
-        new_len = *prev_len;
-    else {
-       new_len = alpha * *prev_len;
-    }
-    if ( type == LSUB || type == USUB ) lword = sizeof(int);
-    else lword = sizeof(complex);
-    if ( Glu->MemModel == SYSTEM ) {
-       new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-       if ( no_expand != 0 ) {
-           tries = 0;
-           if ( keep_prev ) {
-               if ( !new_mem ) return (NULL);
-           } else {
-               while ( !new_mem ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-               }
-           }
-           if ( type == LSUB || type == USUB ) {
-               copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
-           } else {
-               copy_mem_complex(len_to_copy, expanders[type].mem, new_mem);
-           }
-           SUPERLU_FREE (expanders[type].mem);
-       }
-       expanders[type].mem = (void *) new_mem;
-    } else { /* MemModel == USER */
-       if ( no_expand == 0 ) {
-           new_mem = cuser_malloc(new_len * lword, HEAD);
-           if ( NotDoubleAlign(new_mem) &&
-               (type == LUSUP || type == UCOL) ) {
-               old_mem = new_mem;
-               new_mem = (void *)DoubleAlign(new_mem);
-               extra = (char*)new_mem - (char*)old_mem;
-#ifdef DEBUG           
-               printf("expand(): not aligned, extra %d\n", extra);
-               stack.top1 += extra;
-               stack.used += extra;
-           }
-           expanders[type].mem = (void *) new_mem;
-       }
-       else {
-           tries = 0;
-           extra = (new_len - *prev_len) * lword;
-           if ( keep_prev ) {
-               if ( StackFull(extra) ) return (NULL);
-           } else {
-               while ( StackFull(extra) ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   extra = (new_len - *prev_len) * lword;          
-               }
-           }
-           if ( type != USUB ) {
-               new_mem = (void*)((char*)expanders[type + 1].mem + extra);
-               bytes_to_copy = (char*)stack.array + stack.top1
-                   - (char*)expanders[type + 1].mem;
-               user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
-               if ( type < USUB ) {
-                   Glu->usub = expanders[USUB].mem =
-                       (void*)((char*)expanders[USUB].mem + extra);
-               }
-               if ( type < LSUB ) {
-                   Glu->lsub = expanders[LSUB].mem =
-                       (void*)((char*)expanders[LSUB].mem + extra);
-               }
-               if ( type < UCOL ) {
-                   Glu->ucol = expanders[UCOL].mem =
-                       (void*)((char*)expanders[UCOL].mem + extra);
-               }
-               stack.top1 += extra;
-               stack.used += extra;
-               if ( type == UCOL ) {
-                   stack.top1 += extra;   /* Add same amount for USUB */
-                   stack.used += extra;
-               }
-           } /* if ... */
-       } /* else ... */
-    }
-    expanders[type].size = new_len;
-    *prev_len = new_len;
-    if ( no_expand ) ++no_expand;
-    return (void *) expanders[type].mem;
-} /* cexpand */
- * Compress the work[] array to remove fragmentation.
- */
-cStackCompress(GlobalLU_t *Glu)
-    register int iword, dword, ndim;
-    char    *last, *fragment;
-    int      *ifrom, *ito;
-    complex   *dfrom, *dto;
-    int      *xlsub, *lsub, *xusub, *usub, *xlusup;
-    complex   *ucol, *lusup;
-    iword = sizeof(int);
-    dword = sizeof(complex);
-    ndim = Glu->n;
-    xlsub  = Glu->xlsub;
-    lsub   = Glu->lsub;
-    xusub  = Glu->xusub;
-    usub   = Glu->usub;
-    xlusup = Glu->xlusup;
-    ucol   = Glu->ucol;
-    lusup  = Glu->lusup;
-    dfrom = ucol;
-    dto = (complex *)((char*)lusup + xlusup[ndim] * dword);
-    copy_mem_complex(xusub[ndim], dfrom, dto);
-    ucol = dto;
-    ifrom = lsub;
-    ito = (int *) ((char*)ucol + xusub[ndim] * iword);
-    copy_mem_int(xlsub[ndim], ifrom, ito);
-    lsub = ito;
-    ifrom = usub;
-    ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
-    copy_mem_int(xusub[ndim], ifrom, ito);
-    usub = ito;
-    last = (char*)usub + xusub[ndim] * iword;
-    fragment = (char*) (((char*)stack.array + stack.top1) - last);
-    stack.used -= (long int) fragment;
-    stack.top1 -= (long int) fragment;
-    Glu->ucol = ucol;
-    Glu->lsub = lsub;
-    Glu->usub = usub;
-#ifdef DEBUG
-    printf("cStackCompress: fragment %d\n", fragment);
-    /* for (last = 0; last < ndim; ++last)
-       print_lu_col("After compress:", last, 0);*/
- * Allocate storage for original matrix A
- */
-callocateA(int n, int nnz, complex **a, int **asub, int **xa)
-    *a    = (complex *) complexMalloc(nnz);
-    *asub = (int *) intMalloc(nnz);
-    *xa   = (int *) intMalloc(n+1);
-complex *complexMalloc(int n)
-    complex *buf;
-    buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex)); 
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in complexMalloc()\n");
-    }
-    return (buf);
-complex *complexCalloc(int n)
-    complex *buf;
-    register int i;
-    complex zero = {0.0, 0.0};
-    buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in complexCalloc()\n");
-    }
-    for (i = 0; i < n; ++i) buf[i] = zero;
-    return (buf);
-int cmemory_usage(const int nzlmax, const int nzumax, 
-                 const int nzlumax, const int n)
-    register int iword, dword;
-    iword   = sizeof(int);
-    dword   = sizeof(complex);
-    return (10 * n * iword +
-           nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
diff --git a/superlu/cmyblas2.c b/superlu/cmyblas2.c
deleted file mode 100644
index 7717baef..00000000
--- a/superlu/cmyblas2.c
+++ /dev/null
@@ -1,204 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:          cmyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
-#include "slu_scomplex.h"
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void clsolve ( int ldm, int ncol, complex *M, complex *rhs )
-    int k;
-    complex x0, x1, x2, x3, temp;
-    complex *M0;
-    complex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-    M0 = &M[0];
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-       Mki0 = M0 + 1;
-       Mki1 = Mki0 + ldm + 1;
-       Mki2 = Mki1 + ldm + 1;
-       Mki3 = Mki2 + ldm + 1;
-       x0 = rhs[firstcol];
-       cc_mult(&temp, &x0, Mki0); Mki0++;
-       c_sub(&x1, &rhs[firstcol+1], &temp);
-       cc_mult(&temp, &x0, Mki0); Mki0++;
-       c_sub(&x2, &rhs[firstcol+2], &temp);
-       cc_mult(&temp, &x1, Mki1); Mki1++;
-       c_sub(&x2, &x2, &temp);
-       cc_mult(&temp, &x0, Mki0); Mki0++;
-       c_sub(&x3, &rhs[firstcol+3], &temp);
-       cc_mult(&temp, &x1, Mki1); Mki1++;
-       c_sub(&x3, &x3, &temp);
-       cc_mult(&temp, &x2, Mki2); Mki2++;
-       c_sub(&x3, &x3, &temp);
-       rhs[++firstcol] = x1;
-       rhs[++firstcol] = x2;
-       rhs[++firstcol] = x3;
-       ++firstcol;
-       for (k = firstcol; k < ncol; k++) {
-           cc_mult(&temp, &x0, Mki0); Mki0++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-           cc_mult(&temp, &x1, Mki1); Mki1++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-           cc_mult(&temp, &x2, Mki2); Mki2++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-           cc_mult(&temp, &x3, Mki3); Mki3++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-       }
-        M0 += 4 * ldm + 4;
-    }
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-        Mki0 = M0 + 1;
-        Mki1 = Mki0 + ldm + 1;
-        x0 = rhs[firstcol];
-       cc_mult(&temp, &x0, Mki0); Mki0++;
-       c_sub(&x1, &rhs[firstcol+1], &temp);
-       rhs[++firstcol] = x1;
-       ++firstcol;
-       for (k = firstcol; k < ncol; k++) {
-           cc_mult(&temp, &x0, Mki0); Mki0++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-           cc_mult(&temp, &x1, Mki1); Mki1++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-       } 
-    }
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-cusolve ( ldm, ncol, M, rhs )
-int ldm;       /* in */
-int ncol;      /* in */
-complex *M;    /* in */
-complex *rhs;  /* modified */
-    complex xj, temp;
-    int jcol, j, irow;
-    jcol = ncol - 1;
-    for (j = 0; j < ncol; j++) {
-       c_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
-       rhs[jcol] = xj;
-       for (irow = 0; irow < jcol; irow++) {
-           cc_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
-           c_sub(&rhs[irow], &rhs[irow], &temp);
-       }
-       jcol--;
-    }
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void cmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-int ldm;       /* in -- leading dimension of M */
-int nrow;      /* in */ 
-int ncol;      /* in */
-complex *M;    /* in */
-complex *vec;  /* in */
-complex *Mxvec;        /* in/out */
-    complex vi0, vi1, vi2, vi3;
-    complex *M0, temp;
-    complex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-    int k;
-    M0 = &M[0];
-    while ( firstcol < ncol - 3 ) {    /* Do 4 columns */
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-       Mki2 = Mki1 + ldm;
-       Mki3 = Mki2 + ldm;
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       for (k = 0; k < nrow; k++) {
-           cc_mult(&temp, &vi0, Mki0); Mki0++;
-           c_add(&Mxvec[k], &Mxvec[k], &temp);
-           cc_mult(&temp, &vi1, Mki1); Mki1++;
-           c_add(&Mxvec[k], &Mxvec[k], &temp);
-           cc_mult(&temp, &vi2, Mki2); Mki2++;
-           c_add(&Mxvec[k], &Mxvec[k], &temp);
-           cc_mult(&temp, &vi3, Mki3); Mki3++;
-           c_add(&Mxvec[k], &Mxvec[k], &temp);
-       }
-       M0 += 4 * ldm;
-    }
-    while ( firstcol < ncol ) {                /* Do 1 column */
-       Mki0 = M0;
-       vi0 = vec[firstcol++];
-       for (k = 0; k < nrow; k++) {
-           cc_mult(&temp, &vi0, Mki0); Mki0++;
-           c_add(&Mxvec[k], &Mxvec[k], &temp);
-       }
-       M0 += ldm;
-    }
diff --git a/superlu/colamd.c b/superlu/colamd.c
deleted file mode 100644
index dc531f04..00000000
--- a/superlu/colamd.c
+++ /dev/null
@@ -1,3412 +0,0 @@
-/* ========================================================================== 
-/* === colamd/symamd - a sparse matrix column ordering algorithm ============ 
-/* ========================================================================== 
-    colamd:  an approximate minimum degree column ordering algorithm,
-       for LU factorization of symmetric or unsymmetric matrices,
-       QR factorization, least squares, interior point methods for
-       linear programming problems, and other related problems.
-    symamd:  an approximate minimum degree ordering algorithm for Cholesky
-       factorization of symmetric matrices.
-    Purpose:
-       Colamd computes a permutation Q such that the Cholesky factorization of
-       (AQ)'(AQ) has less fill-in and requires fewer floating point operations
-       than A'A.  This also provides a good ordering for sparse partial
-       pivoting methods, P(AQ) = LU, where Q is computed prior to numerical
-       factorization, and P is computed during numerical factorization via
-       conventional partial pivoting with row interchanges.  Colamd is the
-       column ordering method used in SuperLU, part of the ScaLAPACK library.
-       It is also available as built-in function in MATLAB Version 6,
-       available from MathWorks, Inc. (  This
-       routine can be used in place of colmmd in MATLAB.
-       Symamd computes a permutation P of a symmetric matrix A such that the
-       Cholesky factorization of PAP' has less fill-in and requires fewer
-       floating point operations than A.  Symamd constructs a matrix M such
-       that M'M has the same nonzero pattern of A, and then orders the columns
-       of M using colmmd.  The column ordering of M is then returned as the
-       row and column ordering P of A. 
-    Authors:
-       The authors of the code itself are Stefan I. Larimore and Timothy A.
-       Davis (, University of Florida.  The algorithm was
-       developed in collaboration with John Gilbert, Xerox PARC, and Esmond
-       Ng, Oak Ridge National Laboratory.
-    Date:
-       September 8, 2003.  Version 2.3.
-    Acknowledgements:
-       This work was supported by the National Science Foundation, under
-       grants DMS-9504974 and DMS-9803599.
-    Copyright and License:
-       Copyright (c) 1998-2003 by the University of Florida.
-       All Rights Reserved.
-       Permission is hereby granted to use, copy, modify, and/or distribute
-       this program, provided that the Copyright, this License, and the
-       Availability of the original version is retained on all copies and made
-       accessible to the end-user of any code or package that includes COLAMD
-       or any modified version of COLAMD. 
-    Availability:
-       The colamd/symamd library is available at
-       This is the
-       file.  It requires the colamd.h file.  It is required by the colamdmex.c
-       and symamdmex.c files, for the MATLAB interface to colamd and symamd.
-    See the ChangeLog file for changes since Version 1.0.
-/* ========================================================================== 
-/* === Description of user-callable routines ================================ 
-/* ========================================================================== 
-    colamd_recommended:
-       C syntax:
-           #include "colamd.h"
-           int colamd_recommended (int nnz, int n_row, int n_col) ;
-           or as a C macro
-           #include "colamd.h"
-           Alen = COLAMD_RECOMMENDED (int nnz, int n_row, int n_col) ;
-       Purpose:
-           Returns recommended value of Alen for use by colamd.  Returns -1
-           if any input argument is negative.  The use of this routine
-           or macro is optional.  Note that the macro uses its arguments
-           more than once, so be careful for side effects, if you pass
-           expressions as arguments to COLAMD_RECOMMENDED.  Not needed for
-           symamd, which dynamically allocates its own memory.
-       Arguments (all input arguments):
-           int nnz ;           Number of nonzeros in the matrix A.  This must
-                               be the same value as p [n_col] in the call to
-                               colamd - otherwise you will get a wrong value
-                               of the recommended memory to use.
-           int n_row ;         Number of rows in the matrix A.
-           int n_col ;         Number of columns in the matrix A.
-    colamd_set_defaults:
-       C syntax:
-           #include "colamd.h"
-           colamd_set_defaults (double knobs [COLAMD_KNOBS]) ;
-       Purpose:
-           Sets the default parameters.  The use of this routine is optional.
-       Arguments:
-           double knobs [COLAMD_KNOBS] ;       Output only.
-               Colamd: rows with more than (knobs [COLAMD_DENSE_ROW] * n_col)
-               entries are removed prior to ordering.  Columns with more than
-               (knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to
-               ordering, and placed last in the output column ordering. 
-               Symamd: uses only knobs [COLAMD_DENSE_ROW], which is knobs [0].
-               Rows and columns with more than (knobs [COLAMD_DENSE_ROW] * n)
-               entries are removed prior to ordering, and placed last in the
-               output ordering.
-               COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1,
-               respectively, in colamd.h.  Default values of these two knobs
-               are both 0.5.  Currently, only knobs [0] and knobs [1] are
-               used, but future versions may use more knobs.  If so, they will
-               be properly set to their defaults by the future version of
-               colamd_set_defaults, so that the code that calls colamd will
-               not need to change, assuming that you either use
-               colamd_set_defaults, or pass a (double *) NULL pointer as the
-               knobs array to colamd or symamd.
-    colamd:
-       C syntax:
-           #include "colamd.h"
-           int colamd (int n_row, int n_col, int Alen, int *A, int *p,
-               double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS]) ;
-       Purpose:
-           Computes a column ordering (Q) of A such that P(AQ)=LU or
-           (AQ)'AQ=LL' have less fill-in and require fewer floating point
-           operations than factorizing the unpermuted matrix A or A'A,
-           respectively.
-       Returns:
-           TRUE (1) if successful, FALSE (0) otherwise.
-       Arguments:
-           int n_row ;         Input argument.
-               Number of rows in the matrix A.
-               Restriction:  n_row >= 0.
-               Colamd returns FALSE if n_row is negative.
-           int n_col ;         Input argument.
-               Number of columns in the matrix A.
-               Restriction:  n_col >= 0.
-               Colamd returns FALSE if n_col is negative.
-           int Alen ;          Input argument.
-               Restriction (see note):
-               Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col
-               Colamd returns FALSE if these conditions are not met.
-               Note:  this restriction makes an modest assumption regarding
-               the size of the two typedef's structures in colamd.h.
-               We do, however, guarantee that
-                       Alen >= colamd_recommended (nnz, n_row, n_col)
-               or equivalently as a C preprocessor macro: 
-                       Alen >= COLAMD_RECOMMENDED (nnz, n_row, n_col)
-               will be sufficient.
-           int A [Alen] ;      Input argument, undefined on output.
-               A is an integer array of size Alen.  Alen must be at least as
-               large as the bare minimum value given above, but this is very
-               low, and can result in excessive run time.  For best
-               performance, we recommend that Alen be greater than or equal to
-               colamd_recommended (nnz, n_row, n_col), which adds
-               nnz/5 to the bare minimum value given above.
-               On input, the row indices of the entries in column c of the
-               matrix are held in A [(p [c]) ... (p [c+1]-1)].  The row indices
-               in a given column c need not be in ascending order, and
-               duplicate row indices may be be present.  However, colamd will
-               work a little faster if both of these conditions are met
-               (Colamd puts the matrix into this format, if it finds that the
-               the conditions are not met).
-               The matrix is 0-based.  That is, rows are in the range 0 to
-               n_row-1, and columns are in the range 0 to n_col-1.  Colamd
-               returns FALSE if any row index is out of range.
-               The contents of A are modified during ordering, and are
-               undefined on output.
-           int p [n_col+1] ;   Both input and output argument.
-               p is an integer array of size n_col+1.  On input, it holds the
-               "pointers" for the column form of the matrix A.  Column c of
-               the matrix A is held in A [(p [c]) ... (p [c+1]-1)].  The first
-               entry, p [0], must be zero, and p [c] <= p [c+1] must hold
-               for all c in the range 0 to n_col-1.  The value p [n_col] is
-               thus the total number of entries in the pattern of the matrix A.
-               Colamd returns FALSE if these conditions are not met.
-               On output, if colamd returns TRUE, the array p holds the column
-               permutation (Q, for P(AQ)=LU or (AQ)'(AQ)=LL'), where p [0] is
-               the first column index in the new ordering, and p [n_col-1] is
-               the last.  That is, p [k] = j means that column j of A is the
-               kth pivot column, in AQ, where k is in the range 0 to n_col-1
-               (p [0] = j means that column j of A is the first column in AQ).
-               If colamd returns FALSE, then no permutation is returned, and
-               p is undefined on output.
-           double knobs [COLAMD_KNOBS] ;       Input argument.
-               See colamd_set_defaults for a description.
-           int stats [COLAMD_STATS] ;          Output argument.
-               Statistics on the ordering, and error status.
-               See colamd.h for related definitions.
-               Colamd returns FALSE if stats is not present.
-               stats [0]:  number of dense or empty rows ignored.
-               stats [1]:  number of dense or empty columns ignored (and
-                               ordered last in the output permutation p)
-                               Note that a row can become "empty" if it
-                               contains only "dense" and/or "empty" columns,
-                               and similarly a column can become "empty" if it
-                               only contains "dense" and/or "empty" rows.
-               stats [2]:  number of garbage collections performed.
-                               This can be excessively high if Alen is close
-                               to the minimum required value.
-               stats [3]:  status code.  < 0 is an error code.
-                           > 1 is a warning or notice.
-                       0       OK.  Each column of the input matrix contained
-                               row indices in increasing order, with no
-                               duplicates.
-                       1       OK, but columns of input matrix were jumbled
-                               (unsorted columns or duplicate entries).  Colamd
-                               had to do some extra work to sort the matrix
-                               first and remove duplicate entries, but it
-                               still was able to return a valid permutation
-                               (return value of colamd was TRUE).
-                                       stats [4]: highest numbered column that
-                                               is unsorted or has duplicate
-                                               entries.
-                                       stats [5]: last seen duplicate or
-                                               unsorted row index.
-                                       stats [6]: number of duplicate or
-                                               unsorted row indices.
-                       -1      A is a null pointer
-                       -2      p is a null pointer
-                       -3      n_row is negative
-                                       stats [4]: n_row
-                       -4      n_col is negative
-                                       stats [4]: n_col
-                       -5      number of nonzeros in matrix is negative
-                                       stats [4]: number of nonzeros, p [n_col]
-                       -6      p [0] is nonzero
-                                       stats [4]: p [0]
-                       -7      A is too small
-                                       stats [4]: required size
-                                       stats [5]: actual size (Alen)
-                       -8      a column has a negative number of entries
-                                       stats [4]: column with < 0 entries
-                                       stats [5]: number of entries in col
-                       -9      a row index is out of bounds
-                                       stats [4]: column with bad row index
-                                       stats [5]: bad row index
-                                       stats [6]: n_row, # of rows of matrx
-                       -10     (unused; see symamd.c)
-                       -999    (unused; see symamd.c)
-               Future versions may return more statistics in the stats array.
-       Example:
-           See
-           for a complete example.
-           To order the columns of a 5-by-4 matrix with 11 nonzero entries in
-           the following nonzero pattern
-               x 0 x 0
-               x 0 x x
-               0 x x 0
-               0 0 x x
-               x x 0 0
-           with default knobs and no output statistics, do the following:
-               #include "colamd.h"
-               #define ALEN COLAMD_RECOMMENDED (11, 5, 4)
-               int A [ALEN] = {1, 2, 5, 3, 5, 1, 2, 3, 4, 2, 4} ;
-               int p [ ] = {0, 3, 5, 9, 11} ;
-               int stats [COLAMD_STATS] ;
-               colamd (5, 4, ALEN, A, p, (double *) NULL, stats) ;
-           The permutation is returned in the array p, and A is destroyed.
-    symamd:
-       C syntax:
-           #include "colamd.h"
-           int symamd (int n, int *A, int *p, int *perm,
-               double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS],
-               void (*allocate) (size_t, size_t), void (*release) (void *)) ;
-       Purpose:
-           The symamd routine computes an ordering P of a symmetric sparse
-           matrix A such that the Cholesky factorization PAP' = LL' remains
-           sparse.  It is based on a column ordering of a matrix M constructed
-           so that the nonzero pattern of M'M is the same as A.  The matrix A
-           is assumed to be symmetric; only the strictly lower triangular part
-           is accessed.  You must pass your selected memory allocator (usually
-           calloc/free or mxCalloc/mxFree) to symamd, for it to allocate
-           memory for the temporary matrix M.
-       Returns:
-           TRUE (1) if successful, FALSE (0) otherwise.
-       Arguments:
-           int n ;             Input argument.
-               Number of rows and columns in the symmetrix matrix A.
-               Restriction:  n >= 0.
-               Symamd returns FALSE if n is negative.
-           int A [nnz] ;       Input argument.
-               A is an integer array of size nnz, where nnz = p [n].
-               The row indices of the entries in column c of the matrix are
-               held in A [(p [c]) ... (p [c+1]-1)].  The row indices in a
-               given column c need not be in ascending order, and duplicate
-               row indices may be present.  However, symamd will run faster
-               if the columns are in sorted order with no duplicate entries. 
-               The matrix is 0-based.  That is, rows are in the range 0 to
-               n-1, and columns are in the range 0 to n-1.  Symamd
-               returns FALSE if any row index is out of range.
-               The contents of A are not modified.
-           int p [n+1] ;       Input argument.
-               p is an integer array of size n+1.  On input, it holds the
-               "pointers" for the column form of the matrix A.  Column c of
-               the matrix A is held in A [(p [c]) ... (p [c+1]-1)].  The first
-               entry, p [0], must be zero, and p [c] <= p [c+1] must hold
-               for all c in the range 0 to n-1.  The value p [n] is
-               thus the total number of entries in the pattern of the matrix A.
-               Symamd returns FALSE if these conditions are not met.
-               The contents of p are not modified.
-           int perm [n+1] ;    Output argument.
-               On output, if symamd returns TRUE, the array perm holds the
-               permutation P, where perm [0] is the first index in the new
-               ordering, and perm [n-1] is the last.  That is, perm [k] = j
-               means that row and column j of A is the kth column in PAP',
-               where k is in the range 0 to n-1 (perm [0] = j means
-               that row and column j of A are the first row and column in
-               PAP').  The array is used as a workspace during the ordering,
-               which is why it must be of length n+1, not just n.
-           double knobs [COLAMD_KNOBS] ;       Input argument.
-               See colamd_set_defaults for a description.
-           int stats [COLAMD_STATS] ;          Output argument.
-               Statistics on the ordering, and error status.
-               See colamd.h for related definitions.
-               Symamd returns FALSE if stats is not present.
-               stats [0]:  number of dense or empty row and columns ignored
-                               (and ordered last in the output permutation 
-                               perm).  Note that a row/column can become
-                               "empty" if it contains only "dense" and/or
-                               "empty" columns/rows.
-               stats [1]:  (same as stats [0])
-               stats [2]:  number of garbage collections performed.
-               stats [3]:  status code.  < 0 is an error code.
-                           > 1 is a warning or notice.
-                       0       OK.  Each column of the input matrix contained
-                               row indices in increasing order, with no
-                               duplicates.
-                       1       OK, but columns of input matrix were jumbled
-                               (unsorted columns or duplicate entries).  Symamd
-                               had to do some extra work to sort the matrix
-                               first and remove duplicate entries, but it
-                               still was able to return a valid permutation
-                               (return value of symamd was TRUE).
-                                       stats [4]: highest numbered column that
-                                               is unsorted or has duplicate
-                                               entries.
-                                       stats [5]: last seen duplicate or
-                                               unsorted row index.
-                                       stats [6]: number of duplicate or
-                                               unsorted row indices.
-                       -1      A is a null pointer
-                       -2      p is a null pointer
-                       -3      (unused, see colamd.c)
-                       -4      n is negative
-                                       stats [4]: n
-                       -5      number of nonzeros in matrix is negative
-                                       stats [4]: # of nonzeros (p [n]).
-                       -6      p [0] is nonzero
-                                       stats [4]: p [0]
-                       -7      (unused)
-                       -8      a column has a negative number of entries
-                                       stats [4]: column with < 0 entries
-                                       stats [5]: number of entries in col
-                       -9      a row index is out of bounds
-                                       stats [4]: column with bad row index
-                                       stats [5]: bad row index
-                                       stats [6]: n_row, # of rows of matrx
-                       -10     out of memory (unable to allocate temporary
-                               workspace for M or count arrays using the
-                               "allocate" routine passed into symamd).
-                       -999    internal error.  colamd failed to order the
-                               matrix M, when it should have succeeded.  This
-                               indicates a bug.  If this (and *only* this)
-                               error code occurs, please contact the authors.
-                               Don't contact the authors if you get any other
-                               error code.
-               Future versions may return more statistics in the stats array.
-           void * (*allocate) (size_t, size_t)
-               A pointer to a function providing memory allocation.  The
-               allocated memory must be returned initialized to zero.  For a
-               C application, this argument should normally be a pointer to
-               calloc.  For a MATLAB mexFunction, the routine mxCalloc is
-               passed instead.
-           void (*release) (size_t, size_t)
-               A pointer to a function that frees memory allocated by the
-               memory allocation routine above.  For a C application, this
-               argument should normally be a pointer to free.  For a MATLAB
-               mexFunction, the routine mxFree is passed instead.
-    colamd_report:
-       C syntax:
-           #include "colamd.h"
-           colamd_report (int stats [COLAMD_STATS]) ;
-       Purpose:
-           Prints the error status and statistics recorded in the stats
-           array on the standard error output (for a standard C routine)
-           or on the MATLAB output (for a mexFunction).
-       Arguments:
-           int stats [COLAMD_STATS] ;  Input only.  Statistics from colamd.
-    symamd_report:
-       C syntax:
-           #include "colamd.h"
-           symamd_report (int stats [COLAMD_STATS]) ;
-       Purpose:
-           Prints the error status and statistics recorded in the stats
-           array on the standard error output (for a standard C routine)
-           or on the MATLAB output (for a mexFunction).
-       Arguments:
-           int stats [COLAMD_STATS] ;  Input only.  Statistics from symamd.
-/* ========================================================================== 
-/* === Scaffolding code definitions  ======================================== 
-/* ========================================================================== 
-/* Ensure that debugging is turned off: */
-#ifndef NDEBUG
-#define NDEBUG
-#endif /* NDEBUG */
-   Our "scaffolding code" philosophy:  In our opinion, well-written library
-   code should keep its "debugging" code, and just normally have it turned off
-   by the compiler so as not to interfere with performance.  This serves
-   several purposes:
-   (1) assertions act as comments to the reader, telling you what the code
-       expects at that point.  All assertions will always be true (unless
-       there really is a bug, of course).
-   (2) leaving in the scaffolding code assists anyone who would like to modify
-       the code, or understand the algorithm (by reading the debugging output,
-       one can get a glimpse into what the code is doing).
-   (3) (gasp!) for actually finding bugs.  This code has been heavily tested
-       and "should" be fully functional and bug-free ... but you never know...
-    To enable debugging, comment out the "#define NDEBUG" above.  For a MATLAB
-    mexFunction, you will also need to modify to remove the -DNDEBUG
-    definition.  The code will become outrageously slow when debugging is
-    enabled.  To control the level of debugging output, set an environment
-    variable D to 0 (little), 1 (some), 2, 3, or 4 (lots).  When debugging,
-    you should see the following message on the standard output:
-       colamd: debug version, D = 1 (THIS WILL BE SLOW!)
-    or a similar message for symamd.  If you don't, then debugging has not
-    been enabled.
-/* ========================================================================== 
-/* === Include files ======================================================== 
-/* ========================================================================== 
-#include "colamd.h"
-#include <limits.h>
-#include "mex.h"
-#include "matrix.h"
-#include <stdio.h>
-#include <assert.h>
-#endif /* MATLAB_MEX_FILE */
-/* ========================================================================== 
-/* === Definitions ========================================================== 
-/* ========================================================================== 
-/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */
-#define PUBLIC
-#define PRIVATE static
-#define MAX(a,b) (((a) > (b)) ? (a) : (b))
-#define MIN(a,b) (((a) < (b)) ? (a) : (b))
-#define ONES_COMPLEMENT(r) (-(r)-1)
-/* -------------------------------------------------------------------------- 
-/* Change for version 2.1:  define TRUE and FALSE only if not yet defined */  
-/* -------------------------------------------------------------------------- 
-#ifndef TRUE
-#define TRUE (1)
-#ifndef FALSE
-#define FALSE (0)
-/* -------------------------------------------------------------------------- 
-#define EMPTY  (-1)
-/* Row and column status */
-#define ALIVE  (0)
-#define DEAD   (-1)
-/* Column status */
-#define DEAD_PRINCIPAL         (-1)
-#define DEAD_NON_PRINCIPAL     (-2)
-/* Macros for row and column status update and checking. */
-#define ROW_IS_DEAD(r)                 ROW_IS_MARKED_DEAD (Row[r].shared2.mark)
-#define ROW_IS_MARKED_DEAD(row_mark)   (row_mark < ALIVE)
-#define ROW_IS_ALIVE(r)                        (Row [r].shared2.mark >= ALIVE)
-#define COL_IS_DEAD(c)                 (Col [c].start < ALIVE)
-#define COL_IS_ALIVE(c)                        (Col [c].start >= ALIVE)
-#define COL_IS_DEAD_PRINCIPAL(c)       (Col [c].start == DEAD_PRINCIPAL)
-#define KILL_ROW(r)                    { Row [r].shared2.mark = DEAD ; }
-#define KILL_PRINCIPAL_COL(c)          { Col [c].start = DEAD_PRINCIPAL ; }
-#define KILL_NON_PRINCIPAL_COL(c)      { Col [c].start = DEAD_NON_PRINCIPAL ; }
-/* ========================================================================== 
-/* === Colamd reporting mechanism =========================================== 
-/* ========================================================================== 
-/* use mexPrintf in a MATLAB mexFunction, for debugging and statistics output 
-#define PRINTF mexPrintf
-/* In MATLAB, matrices are 1-based to the user, but 0-based internally */
-#define INDEX(i) ((i)+1)
-/* Use printf in standard C environment, for debugging and statistics output. 
-/* Output is generated only if debugging is enabled at compile time, or if */
-/* the caller explicitly calls colamd_report or symamd_report. */
-#define PRINTF printf
-/* In C, matrices are 0-based and indices are reported as such in *_report */
-#define INDEX(i) (i)
-#endif /* MATLAB_MEX_FILE */
-/* ========================================================================== 
-/* === Prototypes of PRIVATE routines ======================================= 
-/* ========================================================================== 
-PRIVATE int init_rows_cols
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int p [],
-    int stats [COLAMD_STATS]
-) ;
-PRIVATE void init_scoring
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int head [],
-    double knobs [COLAMD_KNOBS],
-    int *p_n_row2,
-    int *p_n_col2,
-    int *p_max_deg
-) ;
-PRIVATE int find_ordering
-    int n_row,
-    int n_col,
-    int Alen,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int head [],
-    int n_col2,
-    int max_deg,
-    int pfree
-) ;
-PRIVATE void order_children
-    int n_col,
-    Colamd_Col Col [],
-    int p []
-) ;
-PRIVATE void detect_super_cols
-#ifndef NDEBUG
-    int n_col,
-    Colamd_Row Row [],
-#endif /* NDEBUG */
-    Colamd_Col Col [],
-    int A [],
-    int head [],
-    int row_start,
-    int row_length
-) ;
-PRIVATE int garbage_collection
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int *pfree
-) ;
-PRIVATE int clear_mark
-    int n_row,
-    Colamd_Row Row []
-) ;
-PRIVATE void print_report
-    char *method,
-    int stats [COLAMD_STATS]
-) ;
-/* ========================================================================== 
-/* === Debugging prototypes and definitions ================================= 
-/* ========================================================================== 
-#ifndef NDEBUG
-/* colamd_debug is the *ONLY* global variable, and is only */
-/* present when debugging */
-PRIVATE int colamd_debug ;     /* debug print level */
-#define DEBUG0(params) { (void) PRINTF params ; }
-#define DEBUG1(params) { if (colamd_debug >= 1) (void) PRINTF params ; }
-#define DEBUG2(params) { if (colamd_debug >= 2) (void) PRINTF params ; }
-#define DEBUG3(params) { if (colamd_debug >= 3) (void) PRINTF params ; }
-#define DEBUG4(params) { if (colamd_debug >= 4) (void) PRINTF params ; }
-#define ASSERT(expression) (mxAssert ((expression), ""))
-#define ASSERT(expression) (assert (expression))
-#endif /* MATLAB_MEX_FILE */
-PRIVATE void colamd_get_debug  /* gets the debug print level from getenv */
-    char *method
-) ;
-PRIVATE void debug_deg_lists
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int head [],
-    int min_score,
-    int should,
-    int max_deg
-) ;
-PRIVATE void debug_mark
-    int n_row,
-    Colamd_Row Row [],
-    int tag_mark,
-    int max_mark
-) ;
-PRIVATE void debug_matrix
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A []
-) ;
-PRIVATE void debug_structures
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int n_col2
-) ;
-#else /* NDEBUG */
-/* === No debugging ========================================================= 
-#define DEBUG0(params) ;
-#define DEBUG1(params) ;
-#define DEBUG2(params) ;
-#define DEBUG3(params) ;
-#define DEBUG4(params) ;
-#define ASSERT(expression) ((void) 0)
-#endif /* NDEBUG */
-/* ========================================================================== 
-/* ========================================================================== 
-/* === USER-CALLABLE ROUTINES: ============================================== 
-/* ========================================================================== 
-/* ========================================================================== 
-/* === colamd_recommended =================================================== 
-/* ========================================================================== 
-    The colamd_recommended routine returns the suggested size for Alen.  This
-    value has been determined to provide good balance between the number of
-    garbage collections and the memory requirements for colamd.  If any
-    argument is negative, a -1 is returned as an error condition.  This
-    function is also available as a macro defined in colamd.h, so that you
-    can use it for a statically-allocated array size.
-PUBLIC int colamd_recommended  /* returns recommended value of Alen. */
-    /* === Parameters ======================================================= 
-    int nnz,                   /* number of nonzeros in A */
-    int n_row,                 /* number of rows in A */
-    int n_col                  /* number of columns in A */
-    return (COLAMD_RECOMMENDED (nnz, n_row, n_col)) ; 
-/* ========================================================================== 
-/* === colamd_set_defaults ================================================== 
-/* ========================================================================== 
-    The colamd_set_defaults routine sets the default values of the user-
-    controllable parameters for colamd:
-       knobs [0]       rows with knobs[0]*n_col entries or more are removed
-                       prior to ordering in colamd.  Rows and columns with
-                       knobs[0]*n_col entries or more are removed prior to
-                       ordering in symamd and placed last in the output
-                       ordering.
-       knobs [1]       columns with knobs[1]*n_row entries or more are removed
-                       prior to ordering in colamd, and placed last in the
-                       column permutation.  Symamd ignores this knob.
-       knobs [2..19]   unused, but future versions might use this
-PUBLIC void colamd_set_defaults
-    /* === Parameters ======================================================= 
-    double knobs [COLAMD_KNOBS]                /* knob array */
-    /* === Local variables ================================================== 
-    int i ;
-    if (!knobs)
-    {
-       return ;                        /* no knobs to initialize */
-    }
-    for (i = 0 ; i < COLAMD_KNOBS ; i++)
-    {
-       knobs [i] = 0 ;
-    }
-    knobs [COLAMD_DENSE_ROW] = 0.5 ;   /* ignore rows over 50% dense */
-    knobs [COLAMD_DENSE_COL] = 0.5 ;   /* ignore columns over 50% dense */
-/* ========================================================================== 
-/* === symamd =============================================================== 
-/* ========================================================================== 
-PUBLIC int symamd                      /* return TRUE if OK, FALSE otherwise */
-    /* === Parameters ======================================================= 
-    int n,                             /* number of rows and columns of A */
-    int A [],                          /* row indices of A */
-    int p [],                          /* column pointers of A */
-    int perm [],                       /* output permutation, size n+1 */
-    double knobs [COLAMD_KNOBS],       /* parameters (uses defaults if NULL) */
-    int stats [COLAMD_STATS],          /* output statistics and error codes */
-    void * (*allocate) (size_t, size_t),
-                                       /* pointer to calloc (ANSI C) or */
-                                       /* mxCalloc (for MATLAB mexFunction) */
-    void (*release) (void *)
-                                       /* pointer to free (ANSI C) or */
-                                       /* mxFree (for MATLAB mexFunction) */
-    /* === Local variables ================================================== 
-    int *count ;               /* length of each column of M, and col pointer*/
-    int *mark ;                        /* mark array for finding duplicate 
entries */
-    int *M ;                   /* row indices of matrix M */
-    int Mlen ;                 /* length of M */
-    int n_row ;                        /* number of rows in M */
-    int nnz ;                  /* number of entries in A */
-    int i ;                    /* row index of A */
-    int j ;                    /* column index of A */
-    int k ;                    /* row index of M */ 
-    int mnz ;                  /* number of nonzeros in M */
-    int pp ;                   /* index into a column of A */
-    int last_row ;             /* last row seen in the current column */
-    int length ;               /* number of nonzeros in a column */
-    double cknobs [COLAMD_KNOBS] ;             /* knobs for colamd */
-    double default_knobs [COLAMD_KNOBS] ;      /* default knobs for colamd */
-    int cstats [COLAMD_STATS] ;                        /* colamd stats */
-#ifndef NDEBUG
-    colamd_get_debug ("symamd") ;
-#endif /* NDEBUG */
-    /* === Check the input arguments ======================================== 
-    if (!stats)
-    {
-       DEBUG0 (("symamd: stats not present\n")) ;
-       return (FALSE) ;
-    }
-    for (i = 0 ; i < COLAMD_STATS ; i++)
-    {
-       stats [i] = 0 ;
-    }
-    stats [COLAMD_STATUS] = COLAMD_OK ;
-    stats [COLAMD_INFO1] = -1 ;
-    stats [COLAMD_INFO2] = -1 ;
-    if (!A)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
-       DEBUG0 (("symamd: A not present\n")) ;
-       return (FALSE) ;
-    }
-    if (!p)            /* p is not present */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
-       DEBUG0 (("symamd: p not present\n")) ;
-       return (FALSE) ;
-    }
-    if (n < 0)         /* n must be >= 0 */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
-       stats [COLAMD_INFO1] = n ;
-       DEBUG0 (("symamd: n negative %d\n", n)) ;
-       return (FALSE) ;
-    }
-    nnz = p [n] ;
-    if (nnz < 0)       /* nnz must be >= 0 */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
-       stats [COLAMD_INFO1] = nnz ;
-       DEBUG0 (("symamd: number of entries negative %d\n", nnz)) ;
-       return (FALSE) ;
-    }
-    if (p [0] != 0)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ;
-       stats [COLAMD_INFO1] = p [0] ;
-       DEBUG0 (("symamd: p[0] not zero %d\n", p [0])) ;
-       return (FALSE) ;
-    }
-    /* === If no knobs, set default knobs =================================== 
-    if (!knobs)
-    {
-       colamd_set_defaults (default_knobs) ;
-       knobs = default_knobs ;
-    }
-    /* === Allocate count and mark ========================================== 
-    count = (int *) ((*allocate) (n+1, sizeof (int))) ;
-    if (!count)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
-       DEBUG0 (("symamd: allocate count (size %d) failed\n", n+1)) ;
-       return (FALSE) ;
-    }
-    mark = (int *) ((*allocate) (n+1, sizeof (int))) ;
-    if (!mark)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
-       (*release) ((void *) count) ;
-       DEBUG0 (("symamd: allocate mark (size %d) failed\n", n+1)) ;
-       return (FALSE) ;
-    }
-    /* === Compute column counts of M, check if A is valid ================== 
-    stats [COLAMD_INFO3] = 0 ;  /* number of duplicate or unsorted row 
-    for (i = 0 ; i < n ; i++)
-    {
-       mark [i] = -1 ;
-    }
-    for (j = 0 ; j < n ; j++)
-    {
-       last_row = -1 ;
-       length = p [j+1] - p [j] ;
-       if (length < 0)
-       {
-           /* column pointers must be non-decreasing */
-           stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
-           stats [COLAMD_INFO1] = j ;
-           stats [COLAMD_INFO2] = length ;
-           (*release) ((void *) count) ;
-           (*release) ((void *) mark) ;
-           DEBUG0 (("symamd: col %d negative length %d\n", j, length)) ;
-           return (FALSE) ;
-       }
-       for (pp = p [j] ; pp < p [j+1] ; pp++)
-       {
-           i = A [pp] ;
-           if (i < 0 || i >= n)
-           {
-               /* row index i, in column j, is out of bounds */
-               stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
-               stats [COLAMD_INFO1] = j ;
-               stats [COLAMD_INFO2] = i ;
-               stats [COLAMD_INFO3] = n ;
-               (*release) ((void *) count) ;
-               (*release) ((void *) mark) ;
-               DEBUG0 (("symamd: row %d col %d out of bounds\n", i, j)) ;
-               return (FALSE) ;
-           }
-           if (i <= last_row || mark [i] == j)
-           {
-               /* row index is unsorted or repeated (or both), thus col */
-               /* is jumbled.  This is a notice, not an error condition. */
-               stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
-               stats [COLAMD_INFO1] = j ;
-               stats [COLAMD_INFO2] = i ;
-               (stats [COLAMD_INFO3]) ++ ;
-               DEBUG1 (("symamd: row %d col %d unsorted/duplicate\n", i, j)) ;
-           }
-           if (i > j && mark [i] != j)
-           {
-               /* row k of M will contain column indices i and j */
-               count [i]++ ;
-               count [j]++ ;
-           }
-           /* mark the row as having been seen in this column */
-           mark [i] = j ;
-           last_row = i ;
-       }
-    }
-    if (stats [COLAMD_STATUS] == COLAMD_OK)
-    {
-       /* if there are no duplicate entries, then mark is no longer needed */
-       (*release) ((void *) mark) ;
-    }
-    /* === Compute column pointers of M ===================================== 
-    /* use output permutation, perm, for column pointers of M */
-    perm [0] = 0 ;
-    for (j = 1 ; j <= n ; j++)
-    {
-       perm [j] = perm [j-1] + count [j-1] ;
-    }
-    for (j = 0 ; j < n ; j++)
-    {
-       count [j] = perm [j] ;
-    }
-    /* === Construct M ====================================================== 
-    mnz = perm [n] ;
-    n_row = mnz / 2 ;
-    Mlen = colamd_recommended (mnz, n_row, n) ;
-    M = (int *) ((*allocate) (Mlen, sizeof (int))) ;
-    DEBUG0 (("symamd: M is %d-by-%d with %d entries, Mlen = %d\n",
-       n_row, n, mnz, Mlen)) ;
-    if (!M)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
-       (*release) ((void *) count) ;
-       (*release) ((void *) mark) ;
-       DEBUG0 (("symamd: allocate M (size %d) failed\n", Mlen)) ;
-       return (FALSE) ;
-    }
-    k = 0 ;
-    if (stats [COLAMD_STATUS] == COLAMD_OK)
-    {
-       /* Matrix is OK */
-       for (j = 0 ; j < n ; j++)
-       {
-           ASSERT (p [j+1] - p [j] >= 0) ;
-           for (pp = p [j] ; pp < p [j+1] ; pp++)
-           {
-               i = A [pp] ;
-               ASSERT (i >= 0 && i < n) ;
-               if (i > j)
-               {
-                   /* row k of M contains column indices i and j */
-                   M [count [i]++] = k ;
-                   M [count [j]++] = k ;
-                   k++ ;
-               }
-           }
-       }
-    }
-    else
-    {
-       /* Matrix is jumbled.  Do not add duplicates to M.  Unsorted cols OK. */
-       DEBUG0 (("symamd: Duplicates in A.\n")) ;
-       for (i = 0 ; i < n ; i++)
-       {
-           mark [i] = -1 ;
-       }
-       for (j = 0 ; j < n ; j++)
-       {
-           ASSERT (p [j+1] - p [j] >= 0) ;
-           for (pp = p [j] ; pp < p [j+1] ; pp++)
-           {
-               i = A [pp] ;
-               ASSERT (i >= 0 && i < n) ;
-               if (i > j && mark [i] != j)
-               {
-                   /* row k of M contains column indices i and j */
-                   M [count [i]++] = k ;
-                   M [count [j]++] = k ;
-                   k++ ;
-                   mark [i] = j ;
-               }
-           }
-       }
-       (*release) ((void *) mark) ;
-    }
-    /* count and mark no longer needed */
-    (*release) ((void *) count) ;
-    ASSERT (k == n_row) ;
-    /* === Adjust the knobs for M =========================================== 
-    for (i = 0 ; i < COLAMD_KNOBS ; i++)
-    {
-       cknobs [i] = knobs [i] ;
-    }
-    /* there are no dense rows in M */
-    cknobs [COLAMD_DENSE_ROW] = 1.0 ;
-    if (n_row != 0 && n < n_row)
-    {
-       /* On input, the knob is a fraction of 1..n, the number of rows of A. */
-       /* Convert it to a fraction of 1..n_row, of the number of rows of M. */
-       cknobs [COLAMD_DENSE_COL] = (knobs [COLAMD_DENSE_ROW] * n) / n_row ;
-    }
-    else
-    {
-       /* no dense columns in M */
-       cknobs [COLAMD_DENSE_COL] = 1.0 ;
-    }
-    DEBUG0 (("symamd: dense col knob for M: %g\n", cknobs [COLAMD_DENSE_COL])) 
-    /* === Order the columns of M =========================================== 
-    if (!colamd (n_row, n, Mlen, M, perm, cknobs, cstats))
-    {
-       /* This "cannot" happen, unless there is a bug in the code. */
-       stats [COLAMD_STATUS] = COLAMD_ERROR_internal_error ;
-       (*release) ((void *) M) ;
-       DEBUG0 (("symamd: internal error!\n")) ;
-       return (FALSE) ;
-    }
-    /* Note that the output permutation is now in perm */
-    /* === get the statistics for symamd from colamd ======================== 
-    /* note that a dense column in colamd means a dense row and col in symamd 
-    stats [COLAMD_DENSE_ROW]    = cstats [COLAMD_DENSE_COL] ;
-    stats [COLAMD_DENSE_COL]    = cstats [COLAMD_DENSE_COL] ;
-    /* === Free M =========================================================== 
-    (*release) ((void *) M) ;
-    DEBUG0 (("symamd: done.\n")) ;
-    return (TRUE) ;
-/* ========================================================================== 
-/* === colamd =============================================================== 
-/* ========================================================================== 
-    The colamd routine computes a column ordering Q of a sparse matrix
-    A such that the LU factorization P(AQ) = LU remains sparse, where P is
-    selected via partial pivoting.   The routine can also be viewed as
-    providing a permutation Q such that the Cholesky factorization
-    (AQ)'(AQ) = LL' remains sparse.
-PUBLIC int colamd              /* returns TRUE if successful, FALSE otherwise*/
-    /* === Parameters ======================================================= 
-    int n_row,                 /* number of rows in A */
-    int n_col,                 /* number of columns in A */
-    int Alen,                  /* length of A */
-    int A [],                  /* row indices of A */
-    int p [],                  /* pointers to columns in A */
-    double knobs [COLAMD_KNOBS],/* parameters (uses defaults if NULL) */
-    int stats [COLAMD_STATS]   /* output statistics and error codes */
-    /* === Local variables ================================================== 
-    int i ;                    /* loop index */
-    int nnz ;                  /* nonzeros in A */
-    int Row_size ;             /* size of Row [], in integers */
-    int Col_size ;             /* size of Col [], in integers */
-    int need ;                 /* minimum required length of A */
-    Colamd_Row *Row ;          /* pointer into A of Row [0..n_row] array */
-    Colamd_Col *Col ;          /* pointer into A of Col [0..n_col] array */
-    int n_col2 ;               /* number of non-dense, non-empty columns */
-    int n_row2 ;               /* number of non-dense, non-empty rows */
-    int ngarbage ;             /* number of garbage collections performed */
-    int max_deg ;              /* maximum row degree */
-    double default_knobs [COLAMD_KNOBS] ;      /* default knobs array */
-#ifndef NDEBUG
-    colamd_get_debug ("colamd") ;
-#endif /* NDEBUG */
-    /* === Check the input arguments ======================================== 
-    if (!stats)
-    {
-       DEBUG0 (("colamd: stats not present\n")) ;
-       return (FALSE) ;
-    }
-    for (i = 0 ; i < COLAMD_STATS ; i++)
-    {
-       stats [i] = 0 ;
-    }
-    stats [COLAMD_STATUS] = COLAMD_OK ;
-    stats [COLAMD_INFO1] = -1 ;
-    stats [COLAMD_INFO2] = -1 ;
-    if (!A)            /* A is not present */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
-       DEBUG0 (("colamd: A not present\n")) ;
-       return (FALSE) ;
-    }
-    if (!p)            /* p is not present */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
-       DEBUG0 (("colamd: p not present\n")) ;
-       return (FALSE) ;
-    }
-    if (n_row < 0)     /* n_row must be >= 0 */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ;
-       stats [COLAMD_INFO1] = n_row ;
-       DEBUG0 (("colamd: nrow negative %d\n", n_row)) ;
-       return (FALSE) ;
-    }
-    if (n_col < 0)     /* n_col must be >= 0 */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
-       stats [COLAMD_INFO1] = n_col ;
-       DEBUG0 (("colamd: ncol negative %d\n", n_col)) ;
-       return (FALSE) ;
-    }
-    nnz = p [n_col] ;
-    if (nnz < 0)       /* nnz must be >= 0 */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
-       stats [COLAMD_INFO1] = nnz ;
-       DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ;
-       return (FALSE) ;
-    }
-    if (p [0] != 0)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ;
-       stats [COLAMD_INFO1] = p [0] ;
-       DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ;
-       return (FALSE) ;
-    }
-    /* === If no knobs, set default knobs =================================== 
-    if (!knobs)
-    {
-       colamd_set_defaults (default_knobs) ;
-       knobs = default_knobs ;
-    }
-    /* === Allocate the Row and Col arrays from array A ===================== 
-    Col_size = COLAMD_C (n_col) ;
-    Row_size = COLAMD_R (n_row) ;
-    need = 2*nnz + n_col + Col_size + Row_size ;
-    if (need > Alen)
-    {
-       /* not enough space in array A to perform the ordering */
-       stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ;
-       stats [COLAMD_INFO1] = need ;
-       stats [COLAMD_INFO2] = Alen ;
-       DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen));
-       return (FALSE) ;
-    }
-    Alen -= Col_size + Row_size ;
-    Col = (Colamd_Col *) &A [Alen] ;
-    Row = (Colamd_Row *) &A [Alen + Col_size] ;
-    /* === Construct the row and column data structures ===================== 
-    if (!init_rows_cols (n_row, n_col, Row, Col, A, p, stats))
-    {
-       /* input matrix is invalid */
-       DEBUG0 (("colamd: Matrix invalid\n")) ;
-       return (FALSE) ;
-    }
-    /* === Initialize scores, kill dense rows/columns ======================= 
-    init_scoring (n_row, n_col, Row, Col, A, p, knobs,
-       &n_row2, &n_col2, &max_deg) ;
-    /* === Order the supercolumns =========================================== 
-    ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p,
-       n_col2, max_deg, 2*nnz) ;
-    /* === Order the non-principal columns ================================== 
-    order_children (n_col, Col, p) ;
-    /* === Return statistics in stats ======================================= 
-    stats [COLAMD_DENSE_ROW] = n_row - n_row2 ;
-    stats [COLAMD_DENSE_COL] = n_col - n_col2 ;
-    stats [COLAMD_DEFRAG_COUNT] = ngarbage ;
-    DEBUG0 (("colamd: done.\n")) ; 
-    return (TRUE) ;
-/* ========================================================================== 
-/* === colamd_report ======================================================== 
-/* ========================================================================== 
-PUBLIC void colamd_report
-    int stats [COLAMD_STATS]
-    print_report ("colamd", stats) ;
-/* ========================================================================== 
-/* === symamd_report ======================================================== 
-/* ========================================================================== 
-PUBLIC void symamd_report
-    int stats [COLAMD_STATS]
-    print_report ("symamd", stats) ;
-/* ========================================================================== 
-/* === NON-USER-CALLABLE ROUTINES: ========================================== 
-/* ========================================================================== 
-/* There are no user-callable routines beyond this point in the file */
-/* ========================================================================== 
-/* === init_rows_cols ======================================================= 
-/* ========================================================================== 
-    Takes the column form of the matrix in A and creates the row form of the
-    matrix.  Also, row and column attributes are stored in the Col and Row
-    structs.  If the columns are un-sorted or contain duplicate row indices,
-    this routine will also sort and remove duplicate row indices from the
-    column form of the matrix.  Returns FALSE if the matrix is invalid,
-    TRUE otherwise.  Not user-callable.
-PRIVATE int init_rows_cols     /* returns TRUE if OK, or FALSE otherwise */
-    /* === Parameters ======================================================= 
-    int n_row,                 /* number of rows of A */
-    int n_col,                 /* number of columns of A */
-    Colamd_Row Row [],         /* of size n_row+1 */
-    Colamd_Col Col [],         /* of size n_col+1 */
-    int A [],                  /* row indices of A, of size Alen */
-    int p [],                  /* pointers to columns in A, of size n_col+1 */
-    int stats [COLAMD_STATS]   /* colamd statistics */ 
-    /* === Local variables ================================================== 
-    int col ;                  /* a column index */
-    int row ;                  /* a row index */
-    int *cp ;                  /* a column pointer */
-    int *cp_end ;              /* a pointer to the end of a column */
-    int *rp ;                  /* a row pointer */
-    int *rp_end ;              /* a pointer to the end of a row */
-    int last_row ;             /* previous row */
-    /* === Initialize columns, and check column pointers ==================== 
-    for (col = 0 ; col < n_col ; col++)
-    {
-       Col [col].start = p [col] ;
-       Col [col].length = p [col+1] - p [col] ;
-       if (Col [col].length < 0)
-       {
-           /* column pointers must be non-decreasing */
-           stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
-           stats [COLAMD_INFO1] = col ;
-           stats [COLAMD_INFO2] = Col [col].length ;
-           DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ;
-           return (FALSE) ;
-       }
-       Col [col].shared1.thickness = 1 ;
-       Col [col].shared2.score = 0 ;
-       Col [col].shared3.prev = EMPTY ;
-       Col [col].shared4.degree_next = EMPTY ;
-    }
-    /* p [0..n_col] no longer needed, used as "head" in subsequent routines */
-    /* === Scan columns, compute row degrees, and check row indices ========= 
-    stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/
-    for (row = 0 ; row < n_row ; row++)
-    {
-       Row [row].length = 0 ;
-       Row [row].shared2.mark = -1 ;
-    }
-    for (col = 0 ; col < n_col ; col++)
-    {
-       last_row = -1 ;
-       cp = &A [p [col]] ;
-       cp_end = &A [p [col+1]] ;
-       while (cp < cp_end)
-       {
-           row = *cp++ ;
-           /* make sure row indices within range */
-           if (row < 0 || row >= n_row)
-           {
-               stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
-               stats [COLAMD_INFO1] = col ;
-               stats [COLAMD_INFO2] = row ;
-               stats [COLAMD_INFO3] = n_row ;
-               DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ;
-               return (FALSE) ;
-           }
-           if (row <= last_row || Row [row].shared2.mark == col)
-           {
-               /* row index are unsorted or repeated (or both), thus col */
-               /* is jumbled.  This is a notice, not an error condition. */
-               stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
-               stats [COLAMD_INFO1] = col ;
-               stats [COLAMD_INFO2] = row ;
-               (stats [COLAMD_INFO3]) ++ ;
-               DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col));
-           }
-           if (Row [row].shared2.mark != col)
-           {
-               Row [row].length++ ;
-           }
-           else
-           {
-               /* this is a repeated entry in the column, */
-               /* it will be removed */
-               Col [col].length-- ;
-           }
-           /* mark the row as having been seen in this column */
-           Row [row].shared2.mark = col ;
-           last_row = row ;
-       }
-    }
-    /* === Compute row pointers ============================================= 
-    /* row form of the matrix starts directly after the column */
-    /* form of matrix in A */
-    Row [0].start = p [n_col] ;
-    Row [0].shared1.p = Row [0].start ;
-    Row [0].shared2.mark = -1 ;
-    for (row = 1 ; row < n_row ; row++)
-    {
-       Row [row].start = Row [row-1].start + Row [row-1].length ;
-       Row [row].shared1.p = Row [row].start ;
-       Row [row].shared2.mark = -1 ;
-    }
-    /* === Create row form ================================================== 
-    {
-       /* if cols jumbled, watch for repeated row indices */
-       for (col = 0 ; col < n_col ; col++)
-       {
-           cp = &A [p [col]] ;
-           cp_end = &A [p [col+1]] ;
-           while (cp < cp_end)
-           {
-               row = *cp++ ;
-               if (Row [row].shared2.mark != col)
-               {
-                   A [(Row [row].shared1.p)++] = col ;
-                   Row [row].shared2.mark = col ;
-               }
-           }
-       }
-    }
-    else
-    {
-       /* if cols not jumbled, we don't need the mark (this is faster) */
-       for (col = 0 ; col < n_col ; col++)
-       {
-           cp = &A [p [col]] ;
-           cp_end = &A [p [col+1]] ;
-           while (cp < cp_end)
-           {
-               A [(Row [*cp++].shared1.p)++] = col ;
-           }
-       }
-    }
-    /* === Clear the row marks and set row degrees ========================== 
-    for (row = 0 ; row < n_row ; row++)
-    {
-       Row [row].shared2.mark = 0 ;
-       Row [row] = Row [row].length ;
-    }
-    /* === See if we need to re-create columns ============================== 
-    {
-       DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ;
-#ifndef NDEBUG
-       /* make sure column lengths are correct */
-       for (col = 0 ; col < n_col ; col++)
-       {
-           p [col] = Col [col].length ;
-       }
-       for (row = 0 ; row < n_row ; row++)
-       {
-           rp = &A [Row [row].start] ;
-           rp_end = rp + Row [row].length ;
-           while (rp < rp_end)
-           {
-               p [*rp++]-- ;
-           }
-       }
-       for (col = 0 ; col < n_col ; col++)
-       {
-           ASSERT (p [col] == 0) ;
-       }
-       /* now p is all zero (different than when debugging is turned off) */
-#endif /* NDEBUG */
-       /* === Compute col pointers ========================================= */
-       /* col form of the matrix starts at A [0]. */
-       /* Note, we may have a gap between the col form and the row */
-       /* form if there were duplicate entries, if so, it will be */
-       /* removed upon the first garbage collection */
-       Col [0].start = 0 ;
-       p [0] = Col [0].start ;
-       for (col = 1 ; col < n_col ; col++)
-       {
-           /* note that the lengths here are for pruned columns, i.e. */
-           /* no duplicate row indices will exist for these columns */
-           Col [col].start = Col [col-1].start + Col [col-1].length ;
-           p [col] = Col [col].start ;
-       }
-       /* === Re-create col form =========================================== */
-       for (row = 0 ; row < n_row ; row++)
-       {
-           rp = &A [Row [row].start] ;
-           rp_end = rp + Row [row].length ;
-           while (rp < rp_end)
-           {
-               A [(p [*rp++])++] = row ;
-           }
-       }
-    }
-    /* === Done.  Matrix is not (or no longer) jumbled ====================== 
-    return (TRUE) ;
-/* ========================================================================== 
-/* === init_scoring ========================================================= 
-/* ========================================================================== 
-    Kills dense or empty columns and rows, calculates an initial score for
-    each column, and places all columns in the degree lists.  Not 
-PRIVATE void init_scoring
-    /* === Parameters ======================================================= 
-    int n_row,                 /* number of rows of A */
-    int n_col,                 /* number of columns of A */
-    Colamd_Row Row [],         /* of size n_row+1 */
-    Colamd_Col Col [],         /* of size n_col+1 */
-    int A [],                  /* column form and row form of A */
-    int head [],               /* of size n_col+1 */
-    double knobs [COLAMD_KNOBS],/* parameters */
-    int *p_n_row2,             /* number of non-dense, non-empty rows */
-    int *p_n_col2,             /* number of non-dense, non-empty columns */
-    int *p_max_deg             /* maximum row degree */
-    /* === Local variables ================================================== 
-    int c ;                    /* a column index */
-    int r, row ;               /* a row index */
-    int *cp ;                  /* a column pointer */
-    int deg ;                  /* degree of a row or column */
-    int *cp_end ;              /* a pointer to the end of a column */
-    int *new_cp ;              /* new column pointer */
-    int col_length ;           /* length of pruned column */
-    int score ;                        /* current column score */
-    int n_col2 ;               /* number of non-dense, non-empty columns */
-    int n_row2 ;               /* number of non-dense, non-empty rows */
-    int dense_row_count ;      /* remove rows with more entries than this */
-    int dense_col_count ;      /* remove cols with more entries than this */
-    int min_score ;            /* smallest column score */
-    int max_deg ;              /* maximum row degree */
-    int next_col ;             /* Used to add to degree list.*/
-#ifndef NDEBUG
-    int debug_count ;          /* debug only. */
-#endif /* NDEBUG */
-    /* === Extract knobs ==================================================== 
-    dense_row_count = MAX (0, MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
-    dense_col_count = MAX (0, MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
-    DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) 
-    max_deg = 0 ;
-    n_col2 = n_col ;
-    n_row2 = n_row ;
-    /* === Kill empty columns =============================================== 
-    /* Put the empty columns at the end in their natural order, so that LU */
-    /* factorization can proceed as far as possible. */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-       deg = Col [c].length ;
-       if (deg == 0)
-       {
-           /* this is a empty column, kill and order it last */
-           Col [c].shared2.order = --n_col2 ;
-           KILL_PRINCIPAL_COL (c) ;
-       }
-    }
-    DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ;
-    /* === Kill dense columns =============================================== 
-    /* Put the dense columns at the end, in their natural order */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-       /* skip any dead columns */
-       if (COL_IS_DEAD (c))
-       {
-           continue ;
-       }
-       deg = Col [c].length ;
-       if (deg > dense_col_count)
-       {
-           /* this is a dense column, kill and order it last */
-           Col [c].shared2.order = --n_col2 ;
-           /* decrement the row degrees */
-           cp = &A [Col [c].start] ;
-           cp_end = cp + Col [c].length ;
-           while (cp < cp_end)
-           {
-               Row [*cp++] ;
-           }
-           KILL_PRINCIPAL_COL (c) ;
-       }
-    }
-    DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ;
-    /* === Kill dense and empty rows ======================================== 
-    for (r = 0 ; r < n_row ; r++)
-    {
-       deg = Row [r] ;
-       ASSERT (deg >= 0 && deg <= n_col) ;
-       if (deg > dense_row_count || deg == 0)
-       {
-           /* kill a dense or empty row */
-           KILL_ROW (r) ;
-           --n_row2 ;
-       }
-       else
-       {
-           /* keep track of max degree of remaining rows */
-           max_deg = MAX (max_deg, deg) ;
-       }
-    }
-    DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
-    /* === Compute initial column scores ==================================== 
-    /* At this point the row degrees are accurate.  They reflect the number */
-    /* of "live" (non-dense) columns in each row.  No empty rows exist. */
-    /* Some "live" columns may contain only dead rows, however.  These are */
-    /* pruned in the code below. */
-    /* now find the initial matlab score for each column */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-       /* skip dead column */
-       if (COL_IS_DEAD (c))
-       {
-           continue ;
-       }
-       score = 0 ;
-       cp = &A [Col [c].start] ;
-       new_cp = cp ;
-       cp_end = cp + Col [c].length ;
-       while (cp < cp_end)
-       {
-           /* get a row */
-           row = *cp++ ;
-           /* skip if dead */
-           if (ROW_IS_DEAD (row))
-           {
-               continue ;
-           }
-           /* compact the column */
-           *new_cp++ = row ;
-           /* add row's external degree */
-           score += Row [row] - 1 ;
-           /* guard against integer overflow */
-           score = MIN (score, n_col) ;
-       }
-       /* determine pruned column length */
-       col_length = (int) (new_cp - &A [Col [c].start]) ;
-       if (col_length == 0)
-       {
-           /* a newly-made null column (all rows in this col are "dense" */
-           /* and have already been killed) */
-           DEBUG2 (("Newly null killed: %d\n", c)) ;
-           Col [c].shared2.order = --n_col2 ;
-           KILL_PRINCIPAL_COL (c) ;
-       }
-       else
-       {
-           /* set column length and set score */
-           ASSERT (score >= 0) ;
-           ASSERT (score <= n_col) ;
-           Col [c].length = col_length ;
-           Col [c].shared2.score = score ;
-       }
-    }
-    DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n",
-       n_col-n_col2)) ;
-    /* At this point, all empty rows and columns are dead.  All live columns */
-    /* are "clean" (containing no dead rows) and simplicial (no supercolumns */
-    /* yet).  Rows may contain dead columns, but all live rows contain at */
-    /* least one live column. */
-#ifndef NDEBUG
-    debug_structures (n_row, n_col, Row, Col, A, n_col2) ;
-#endif /* NDEBUG */
-    /* === Initialize degree lists ========================================== 
-#ifndef NDEBUG
-    debug_count = 0 ;
-#endif /* NDEBUG */
-    /* clear the hash buckets */
-    for (c = 0 ; c <= n_col ; c++)
-    {
-       head [c] = EMPTY ;
-    }
-    min_score = n_col ;
-    /* place in reverse order, so low column indices are at the front */
-    /* of the lists.  This is to encourage natural tie-breaking */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-       /* only add principal columns to degree lists */
-       if (COL_IS_ALIVE (c))
-       {
-           DEBUG4 (("place %d score %d minscore %d ncol %d\n",
-               c, Col [c].shared2.score, min_score, n_col)) ;
-           /* === Add columns score to DList =============================== */
-           score = Col [c].shared2.score ;
-           ASSERT (min_score >= 0) ;
-           ASSERT (min_score <= n_col) ;
-           ASSERT (score >= 0) ;
-           ASSERT (score <= n_col) ;
-           ASSERT (head [score] >= EMPTY) ;
-           /* now add this column to dList at proper score location */
-           next_col = head [score] ;
-           Col [c].shared3.prev = EMPTY ;
-           Col [c].shared4.degree_next = next_col ;
-           /* if there already was a column with the same score, set its */
-           /* previous pointer to this new column */
-           if (next_col != EMPTY)
-           {
-               Col [next_col].shared3.prev = c ;
-           }
-           head [score] = c ;
-           /* see if this score is less than current min */
-           min_score = MIN (min_score, score) ;
-#ifndef NDEBUG
-           debug_count++ ;
-#endif /* NDEBUG */
-       }
-    }
-#ifndef NDEBUG
-    DEBUG1 (("colamd: Live cols %d out of %d, non-princ: %d\n",
-       debug_count, n_col, n_col-debug_count)) ;
-    ASSERT (debug_count == n_col2) ;
-    debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) 
-#endif /* NDEBUG */
-    /* === Return number of remaining columns, and max row degree =========== 
-    *p_n_col2 = n_col2 ;
-    *p_n_row2 = n_row2 ;
-    *p_max_deg = max_deg ;
-/* ========================================================================== 
-/* === find_ordering ======================================================== 
-/* ========================================================================== 
-    Order the principal columns of the supercolumn form of the matrix
-    (no supercolumns on input).  Uses a minimum approximate column minimum
-    degree ordering method.  Not user-callable.
-PRIVATE int find_ordering      /* return the number of garbage collections */
-    /* === Parameters ======================================================= 
-    int n_row,                 /* number of rows of A */
-    int n_col,                 /* number of columns of A */
-    int Alen,                  /* size of A, 2*nnz + n_col or larger */
-    Colamd_Row Row [],         /* of size n_row+1 */
-    Colamd_Col Col [],         /* of size n_col+1 */
-    int A [],                  /* column form and row form of A */
-    int head [],               /* of size n_col+1 */
-    int n_col2,                        /* Remaining columns to order */
-    int max_deg,               /* Maximum row degree */
-    int pfree                  /* index of first free slot (2*nnz on entry) */
-    /* === Local variables ================================================== 
-    int k ;                    /* current pivot ordering step */
-    int pivot_col ;            /* current pivot column */
-    int *cp ;                  /* a column pointer */
-    int *rp ;                  /* a row pointer */
-    int pivot_row ;            /* current pivot row */
-    int *new_cp ;              /* modified column pointer */
-    int *new_rp ;              /* modified row pointer */
-    int pivot_row_start ;      /* pointer to start of pivot row */
-    int pivot_row_degree ;     /* number of columns in pivot row */
-    int pivot_row_length ;     /* number of supercolumns in pivot row */
-    int pivot_col_score ;      /* score of pivot column */
-    int needed_memory ;                /* free space needed for pivot row */
-    int *cp_end ;              /* pointer to the end of a column */
-    int *rp_end ;              /* pointer to the end of a row */
-    int row ;                  /* a row index */
-    int col ;                  /* a column index */
-    int max_score ;            /* maximum possible score */
-    int cur_score ;            /* score of current column */
-    unsigned int hash ;                /* hash value for supernode detection */
-    int head_column ;          /* head of hash bucket */
-    int first_col ;            /* first column in hash bucket */
-    int tag_mark ;             /* marker value for mark array */
-    int row_mark ;             /* Row [row].shared2.mark */
-    int set_difference ;       /* set difference size of row with pivot row */
-    int min_score ;            /* smallest column score */
-    int col_thickness ;                /* "thickness" (no. of columns in a 
supercol) */
-    int max_mark ;             /* maximum value of tag_mark */
-    int pivot_col_thickness ;  /* number of columns represented by pivot col */
-    int prev_col ;             /* Used by Dlist operations. */
-    int next_col ;             /* Used by Dlist operations. */
-    int ngarbage ;             /* number of garbage collections performed */
-#ifndef NDEBUG
-    int debug_d ;              /* debug loop counter */
-    int debug_step = 0 ;       /* debug loop counter */
-#endif /* NDEBUG */
-    /* === Initialization and clear mark ==================================== 
-    max_mark = INT_MAX - n_col ;       /* INT_MAX defined in <limits.h> */
-    tag_mark = clear_mark (n_row, Row) ;
-    min_score = 0 ;
-    ngarbage = 0 ;
-    DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ;
-    /* === Order the columns ================================================ 
-    for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */)
-    {
-#ifndef NDEBUG
-       if (debug_step % 100 == 0)
-       {
-           DEBUG2 (("\n...       Step k: %d out of n_col2: %d\n", k, n_col2)) ;
-       }
-       else
-       {
-           DEBUG3 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ;
-       }
-       debug_step++ ;
-       debug_deg_lists (n_row, n_col, Row, Col, head,
-               min_score, n_col2-k, max_deg) ;
-       debug_matrix (n_row, n_col, Row, Col, A) ;
-#endif /* NDEBUG */
-       /* === Select pivot column, and order it ============================ */
-       /* make sure degree list isn't empty */
-       ASSERT (min_score >= 0) ;
-       ASSERT (min_score <= n_col) ;
-       ASSERT (head [min_score] >= EMPTY) ;
-#ifndef NDEBUG
-       for (debug_d = 0 ; debug_d < min_score ; debug_d++)
-       {
-           ASSERT (head [debug_d] == EMPTY) ;
-       }
-#endif /* NDEBUG */
-       /* get pivot column from head of minimum degree list */
-       while (head [min_score] == EMPTY && min_score < n_col)
-       {
-           min_score++ ;
-       }
-       pivot_col = head [min_score] ;
-       ASSERT (pivot_col >= 0 && pivot_col <= n_col) ;
-       next_col = Col [pivot_col].shared4.degree_next ;
-       head [min_score] = next_col ;
-       if (next_col != EMPTY)
-       {
-           Col [next_col].shared3.prev = EMPTY ;
-       }
-       ASSERT (COL_IS_ALIVE (pivot_col)) ;
-       DEBUG3 (("Pivot col: %d\n", pivot_col)) ;
-       /* remember score for defrag check */
-       pivot_col_score = Col [pivot_col].shared2.score ;
-       /* the pivot column is the kth column in the pivot order */
-       Col [pivot_col].shared2.order = k ;
-       /* increment order count by column thickness */
-       pivot_col_thickness = Col [pivot_col].shared1.thickness ;
-       k += pivot_col_thickness ;
-       ASSERT (pivot_col_thickness > 0) ;
-       /* === Garbage_collection, if necessary ============================= */
-       needed_memory = MIN (pivot_col_score, n_col - k) ;
-       if (pfree + needed_memory >= Alen)
-       {
-           pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
-           ngarbage++ ;
-           /* after garbage collection we will have enough */
-           ASSERT (pfree + needed_memory < Alen) ;
-           /* garbage collection has wiped out the Row[].shared2.mark array */
-           tag_mark = clear_mark (n_row, Row) ;
-#ifndef NDEBUG
-           debug_matrix (n_row, n_col, Row, Col, A) ;
-#endif /* NDEBUG */
-       }
-       /* === Compute pivot row pattern ==================================== */
-       /* get starting location for this new merged row */
-       pivot_row_start = pfree ;
-       /* initialize new row counts to zero */
-       pivot_row_degree = 0 ;
-       /* tag pivot column as having been visited so it isn't included */
-       /* in merged pivot row */
-       Col [pivot_col].shared1.thickness = -pivot_col_thickness ;
-       /* pivot row is the union of all rows in the pivot column pattern */
-       cp = &A [Col [pivot_col].start] ;
-       cp_end = cp + Col [pivot_col].length ;
-       while (cp < cp_end)
-       {
-           /* get a row */
-           row = *cp++ ;
-           DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ;
-           /* skip if row is dead */
-           if (ROW_IS_DEAD (row))
-           {
-               continue ;
-           }
-           rp = &A [Row [row].start] ;
-           rp_end = rp + Row [row].length ;
-           while (rp < rp_end)
-           {
-               /* get a column */
-               col = *rp++ ;
-               /* add the column, if alive and untagged */
-               col_thickness = Col [col].shared1.thickness ;
-               if (col_thickness > 0 && COL_IS_ALIVE (col))
-               {
-                   /* tag column in pivot row */
-                   Col [col].shared1.thickness = -col_thickness ;
-                   ASSERT (pfree < Alen) ;
-                   /* place column in pivot row */
-                   A [pfree++] = col ;
-                   pivot_row_degree += col_thickness ;
-               }
-           }
-       }
-       /* clear tag on pivot column */
-       Col [pivot_col].shared1.thickness = pivot_col_thickness ;
-       max_deg = MAX (max_deg, pivot_row_degree) ;
-#ifndef NDEBUG
-       DEBUG3 (("check2\n")) ;
-       debug_mark (n_row, Row, tag_mark, max_mark) ;
-#endif /* NDEBUG */
-       /* === Kill all rows used to construct pivot row ==================== */
-       /* also kill pivot row, temporarily */
-       cp = &A [Col [pivot_col].start] ;
-       cp_end = cp + Col [pivot_col].length ;
-       while (cp < cp_end)
-       {
-           /* may be killing an already dead row */
-           row = *cp++ ;
-           DEBUG3 (("Kill row in pivot col: %d\n", row)) ;
-           KILL_ROW (row) ;
-       }
-       /* === Select a row index to use as the new pivot row =============== */
-       pivot_row_length = pfree - pivot_row_start ;
-       if (pivot_row_length > 0)
-       {
-           /* pick the "pivot" row arbitrarily (first row in col) */
-           pivot_row = A [Col [pivot_col].start] ;
-           DEBUG3 (("Pivotal row is %d\n", pivot_row)) ;
-       }
-       else
-       {
-           /* there is no pivot row, since it is of zero length */
-           pivot_row = EMPTY ;
-           ASSERT (pivot_row_length == 0) ;
-       }
-       ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
-       /* === Approximate degree computation =============================== */
-       /* Here begins the computation of the approximate degree.  The column */
-       /* score is the sum of the pivot row "length", plus the size of the */
-       /* set differences of each row in the column minus the pattern of the */
-       /* pivot row itself.  The column ("thickness") itself is also */
-       /* excluded from the column score (we thus use an approximate */
-       /* external degree). */
-       /* The time taken by the following code (compute set differences, and */
-       /* add them up) is proportional to the size of the data structure */
-       /* being scanned - that is, the sum of the sizes of each column in */
-       /* the pivot row.  Thus, the amortized time to compute a column score */
-       /* is proportional to the size of that column (where size, in this */
-       /* context, is the column "length", or the number of row indices */
-       /* in that column).  The number of row indices in a column is */
-       /* monotonically non-decreasing, from the length of the original */
-       /* column on input to colamd. */
-       /* === Compute set differences ====================================== */
-       DEBUG3 (("** Computing set differences phase. **\n")) ;
-       /* pivot row is currently dead - it will be revived later. */
-       DEBUG3 (("Pivot row: ")) ;
-       /* for each column in pivot row */
-       rp = &A [pivot_row_start] ;
-       rp_end = rp + pivot_row_length ;
-       while (rp < rp_end)
-       {
-           col = *rp++ ;
-           ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
-           DEBUG3 (("Col: %d\n", col)) ;
-           /* clear tags used to construct pivot row pattern */
-           col_thickness = -Col [col].shared1.thickness ;
-           ASSERT (col_thickness > 0) ;
-           Col [col].shared1.thickness = col_thickness ;
-           /* === Remove column from degree list =========================== */
-           cur_score = Col [col].shared2.score ;
-           prev_col = Col [col].shared3.prev ;
-           next_col = Col [col].shared4.degree_next ;
-           ASSERT (cur_score >= 0) ;
-           ASSERT (cur_score <= n_col) ;
-           ASSERT (cur_score >= EMPTY) ;
-           if (prev_col == EMPTY)
-           {
-               head [cur_score] = next_col ;
-           }
-           else
-           {
-               Col [prev_col].shared4.degree_next = next_col ;
-           }
-           if (next_col != EMPTY)
-           {
-               Col [next_col].shared3.prev = prev_col ;
-           }
-           /* === Scan the column ========================================== */
-           cp = &A [Col [col].start] ;
-           cp_end = cp + Col [col].length ;
-           while (cp < cp_end)
-           {
-               /* get a row */
-               row = *cp++ ;
-               row_mark = Row [row].shared2.mark ;
-               /* skip if dead */
-               if (ROW_IS_MARKED_DEAD (row_mark))
-               {
-                   continue ;
-               }
-               ASSERT (row != pivot_row) ;
-               set_difference = row_mark - tag_mark ;
-               /* check if the row has been seen yet */
-               if (set_difference < 0)
-               {
-                   ASSERT (Row [row] <= max_deg) ;
-                   set_difference = Row [row] ;
-               }
-               /* subtract column thickness from this row's set difference */
-               set_difference -= col_thickness ;
-               ASSERT (set_difference >= 0) ;
-               /* absorb this row if the set difference becomes zero */
-               if (set_difference == 0)
-               {
-                   DEBUG3 (("aggressive absorption. Row: %d\n", row)) ;
-                   KILL_ROW (row) ;
-               }
-               else
-               {
-                   /* save the new mark */
-                   Row [row].shared2.mark = set_difference + tag_mark ;
-               }
-           }
-       }
-#ifndef NDEBUG
-       debug_deg_lists (n_row, n_col, Row, Col, head,
-               min_score, n_col2-k-pivot_row_degree, max_deg) ;
-#endif /* NDEBUG */
-       /* === Add up set differences for each column ======================= */
-       DEBUG3 (("** Adding set differences phase. **\n")) ;
-       /* for each column in pivot row */
-       rp = &A [pivot_row_start] ;
-       rp_end = rp + pivot_row_length ;
-       while (rp < rp_end)
-       {
-           /* get a column */
-           col = *rp++ ;
-           ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
-           hash = 0 ;
-           cur_score = 0 ;
-           cp = &A [Col [col].start] ;
-           /* compact the column */
-           new_cp = cp ;
-           cp_end = cp + Col [col].length ;
-           DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ;
-           while (cp < cp_end)
-           {
-               /* get a row */
-               row = *cp++ ;
-               ASSERT(row >= 0 && row < n_row) ;
-               row_mark = Row [row].shared2.mark ;
-               /* skip if dead */
-               if (ROW_IS_MARKED_DEAD (row_mark))
-               {
-                   continue ;
-               }
-               ASSERT (row_mark > tag_mark) ;
-               /* compact the column */
-               *new_cp++ = row ;
-               /* compute hash function */
-               hash += row ;
-               /* add set difference */
-               cur_score += row_mark - tag_mark ;
-               /* integer overflow... */
-               cur_score = MIN (cur_score, n_col) ;
-           }
-           /* recompute the column's length */
-           Col [col].length = (int) (new_cp - &A [Col [col].start]) ;
-           /* === Further mass elimination ================================= */
-           if (Col [col].length == 0)
-           {
-               DEBUG4 (("further mass elimination. Col: %d\n", col)) ;
-               /* nothing left but the pivot row in this column */
-               KILL_PRINCIPAL_COL (col) ;
-               pivot_row_degree -= Col [col].shared1.thickness ;
-               ASSERT (pivot_row_degree >= 0) ;
-               /* order it */
-               Col [col].shared2.order = k ;
-               /* increment order count by column thickness */
-               k += Col [col].shared1.thickness ;
-           }
-           else
-           {
-               /* === Prepare for supercolumn detection ==================== */
-               DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ;
-               /* save score so far */
-               Col [col].shared2.score = cur_score ;
-               /* add column to hash table, for supercolumn detection */
-               hash %= n_col + 1 ;
-               DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
-               ASSERT (hash <= n_col) ;
-               head_column = head [hash] ;
-               if (head_column > EMPTY)
-               {
-                   /* degree list "hash" is non-empty, use prev (shared3) of */
-                   /* first column in degree list as head of hash bucket */
-                   first_col = Col [head_column].shared3.headhash ;
-                   Col [head_column].shared3.headhash = col ;
-               }
-               else
-               {
-                   /* degree list "hash" is empty, use head as hash bucket */
-                   first_col = - (head_column + 2) ;
-                   head [hash] = - (col + 2) ;
-               }
-               Col [col].shared4.hash_next = first_col ;
-               /* save hash function in Col [col].shared3.hash */
-               Col [col].shared3.hash = (int) hash ;
-               ASSERT (COL_IS_ALIVE (col)) ;
-           }
-       }
-       /* The approximate external column degree is now computed.  */
-       /* === Supercolumn detection ======================================== */
-       DEBUG3 (("** Supercolumn detection phase. **\n")) ;
-       detect_super_cols (
-#ifndef NDEBUG
-               n_col, Row,
-#endif /* NDEBUG */
-               Col, A, head, pivot_row_start, pivot_row_length) ;
-       /* === Kill the pivotal column ====================================== */
-       KILL_PRINCIPAL_COL (pivot_col) ;
-       /* === Clear mark =================================================== */
-       tag_mark += (max_deg + 1) ;
-       if (tag_mark >= max_mark)
-       {
-           DEBUG2 (("clearing tag_mark\n")) ;
-           tag_mark = clear_mark (n_row, Row) ;
-       }
-#ifndef NDEBUG
-       DEBUG3 (("check3\n")) ;
-       debug_mark (n_row, Row, tag_mark, max_mark) ;
-#endif /* NDEBUG */
-       /* === Finalize the new pivot row, and column scores ================ */
-       DEBUG3 (("** Finalize scores phase. **\n")) ;
-       /* for each column in pivot row */
-       rp = &A [pivot_row_start] ;
-       /* compact the pivot row */
-       new_rp = rp ;
-       rp_end = rp + pivot_row_length ;
-       while (rp < rp_end)
-       {
-           col = *rp++ ;
-           /* skip dead columns */
-           if (COL_IS_DEAD (col))
-           {
-               continue ;
-           }
-           *new_rp++ = col ;
-           /* add new pivot row to column */
-           A [Col [col].start + (Col [col].length++)] = pivot_row ;
-           /* retrieve score so far and add on pivot row's degree. */
-           /* (we wait until here for this in case the pivot */
-           /* row's degree was reduced due to mass elimination). */
-           cur_score = Col [col].shared2.score + pivot_row_degree ;
-           /* calculate the max possible score as the number of */
-           /* external columns minus the 'k' value minus the */
-           /* columns thickness */
-           max_score = n_col - k - Col [col].shared1.thickness ;
-           /* make the score the external degree of the union-of-rows */
-           cur_score -= Col [col].shared1.thickness ;
-           /* make sure score is less or equal than the max score */
-           cur_score = MIN (cur_score, max_score) ;
-           ASSERT (cur_score >= 0) ;
-           /* store updated score */
-           Col [col].shared2.score = cur_score ;
-           /* === Place column back in degree list ========================= */
-           ASSERT (min_score >= 0) ;
-           ASSERT (min_score <= n_col) ;
-           ASSERT (cur_score >= 0) ;
-           ASSERT (cur_score <= n_col) ;
-           ASSERT (head [cur_score] >= EMPTY) ;
-           next_col = head [cur_score] ;
-           Col [col].shared4.degree_next = next_col ;
-           Col [col].shared3.prev = EMPTY ;
-           if (next_col != EMPTY)
-           {
-               Col [next_col].shared3.prev = col ;
-           }
-           head [cur_score] = col ;
-           /* see if this score is less than current min */
-           min_score = MIN (min_score, cur_score) ;
-       }
-#ifndef NDEBUG
-       debug_deg_lists (n_row, n_col, Row, Col, head,
-               min_score, n_col2-k, max_deg) ;
-#endif /* NDEBUG */
-       /* === Resurrect the new pivot row ================================== */
-       if (pivot_row_degree > 0)
-       {
-           /* update pivot row length to reflect any cols that were killed */
-           /* during super-col detection and mass elimination */
-           Row [pivot_row].start  = pivot_row_start ;
-           Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ;
-           Row [pivot_row] = pivot_row_degree ;
-           Row [pivot_row].shared2.mark = 0 ;
-           /* pivot row is no longer dead */
-       }
-    }
-    /* === All principal columns have now been ordered ====================== 
-    return (ngarbage) ;
-/* ========================================================================== 
-/* === order_children ======================================================= 
-/* ========================================================================== 
-    The find_ordering routine has ordered all of the principal columns (the
-    representatives of the supercolumns).  The non-principal columns have not
-    yet been ordered.  This routine orders those columns by walking up the
-    parent tree (a column is a child of the column which absorbed it).  The
-    final permutation vector is then placed in p [0 ... n_col-1], with p [0]
-    being the first column, and p [n_col-1] being the last.  It doesn't look
-    like it at first glance, but be assured that this routine takes time linear
-    in the number of columns.  Although not immediately obvious, the time
-    taken by this routine is O (n_col), that is, linear in the number of
-    columns.  Not user-callable.
-PRIVATE void order_children
-    /* === Parameters ======================================================= 
-    int n_col,                 /* number of columns of A */
-    Colamd_Col Col [],         /* of size n_col+1 */
-    int p []                   /* p [0 ... n_col-1] is the column permutation*/
-    /* === Local variables ================================================== 
-    int i ;                    /* loop counter for all columns */
-    int c ;                    /* column index */
-    int parent ;               /* index of column's parent */
-    int order ;                        /* column's order */
-    /* === Order each non-principal column ================================== 
-    for (i = 0 ; i < n_col ; i++)
-    {
-       /* find an un-ordered non-principal column */
-       ASSERT (COL_IS_DEAD (i)) ;
-       if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EMPTY)
-       {
-           parent = i ;
-           /* once found, find its principal parent */
-           do
-           {
-               parent = Col [parent].shared1.parent ;
-           } while (!COL_IS_DEAD_PRINCIPAL (parent)) ;
-           /* now, order all un-ordered non-principal columns along path */
-           /* to this parent.  collapse tree at the same time */
-           c = i ;
-           /* get order of parent */
-           order = Col [parent].shared2.order ;
-           do
-           {
-               ASSERT (Col [c].shared2.order == EMPTY) ;
-               /* order this column */
-               Col [c].shared2.order = order++ ;
-               /* collaps tree */
-               Col [c].shared1.parent = parent ;
-               /* get immediate parent of this column */
-               c = Col [c].shared1.parent ;
-               /* continue until we hit an ordered column.  There are */
-               /* guarranteed not to be anymore unordered columns */
-               /* above an ordered column */
-           } while (Col [c].shared2.order == EMPTY) ;
-           /* re-order the super_col parent to largest order for this group */
-           Col [parent].shared2.order = order ;
-       }
-    }
-    /* === Generate the permutation ========================================= 
-    for (c = 0 ; c < n_col ; c++)
-    {
-       p [Col [c].shared2.order] = c ;
-    }
-/* ========================================================================== 
-/* === detect_super_cols ==================================================== 
-/* ========================================================================== 
-    Detects supercolumns by finding matches between columns in the hash 
-    Check amongst columns in the set A [row_start ... row_start + 
-    The columns under consideration are currently *not* in the degree lists,
-    and have already been placed in the hash buckets.
-    The hash bucket for columns whose hash function is equal to h is stored
-    as follows:
-       if head [h] is >= 0, then head [h] contains a degree list, so:
-               head [h] is the first column in degree bucket h.
-               Col [head [h]].headhash gives the first column in hash bucket h.
-       otherwise, the degree list is empty, and:
-               -(head [h] + 2) is the first column in hash bucket h.
-    For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous
-    column" pointer.  Col [c].shared3.hash is used instead as the hash number
-    for that column.  The value of Col [c].shared4.hash_next is the next column
-    in the same hash bucket.
-    Assuming no, or "few" hash collisions, the time taken by this routine is
-    linear in the sum of the sizes (lengths) of each column whose score has
-    just been computed in the approximate degree computation.
-    Not user-callable.
-PRIVATE void detect_super_cols
-    /* === Parameters ======================================================= 
-#ifndef NDEBUG
-    /* these two parameters are only needed when debugging is enabled: */
-    int n_col,                 /* number of columns of A */
-    Colamd_Row Row [],         /* of size n_row+1 */
-#endif /* NDEBUG */
-    Colamd_Col Col [],         /* of size n_col+1 */
-    int A [],                  /* row indices of A */
-    int head [],               /* head of degree lists and hash buckets */
-    int row_start,             /* pointer to set of columns to check */
-    int row_length             /* number of columns to check */
-    /* === Local variables ================================================== 
-    int hash ;                 /* hash value for a column */
-    int *rp ;                  /* pointer to a row */
-    int c ;                    /* a column index */
-    int super_c ;              /* column index of the column to absorb into */
-    int *cp1 ;                 /* column pointer for column super_c */
-    int *cp2 ;                 /* column pointer for column c */
-    int length ;               /* length of column super_c */
-    int prev_c ;               /* column preceding c in hash bucket */
-    int i ;                    /* loop counter */
-    int *rp_end ;              /* pointer to the end of the row */
-    int col ;                  /* a column index in the row to check */
-    int head_column ;          /* first column in hash bucket or degree list */
-    int first_col ;            /* first column in hash bucket */
-    /* === Consider each column in the row ================================== 
-    rp = &A [row_start] ;
-    rp_end = rp + row_length ;
-    while (rp < rp_end)
-    {
-       col = *rp++ ;
-       if (COL_IS_DEAD (col))
-       {
-           continue ;
-       }
-       /* get hash number for this column */
-       hash = Col [col].shared3.hash ;
-       ASSERT (hash <= n_col) ;
-       /* === Get the first column in this hash bucket ===================== */
-       head_column = head [hash] ;
-       if (head_column > EMPTY)
-       {
-           first_col = Col [head_column].shared3.headhash ;
-       }
-       else
-       {
-           first_col = - (head_column + 2) ;
-       }
-       /* === Consider each column in the hash bucket ====================== */
-       for (super_c = first_col ; super_c != EMPTY ;
-           super_c = Col [super_c].shared4.hash_next)
-       {
-           ASSERT (COL_IS_ALIVE (super_c)) ;
-           ASSERT (Col [super_c].shared3.hash == hash) ;
-           length = Col [super_c].length ;
-           /* prev_c is the column preceding column c in the hash bucket */
-           prev_c = super_c ;
-           /* === Compare super_c with all columns after it ================ */
-           for (c = Col [super_c].shared4.hash_next ;
-                c != EMPTY ; c = Col [c].shared4.hash_next)
-           {
-               ASSERT (c != super_c) ;
-               ASSERT (COL_IS_ALIVE (c)) ;
-               ASSERT (Col [c].shared3.hash == hash) ;
-               /* not identical if lengths or scores are different */
-               if (Col [c].length != length ||
-                   Col [c].shared2.score != Col [super_c].shared2.score)
-               {
-                   prev_c = c ;
-                   continue ;
-               }
-               /* compare the two columns */
-               cp1 = &A [Col [super_c].start] ;
-               cp2 = &A [Col [c].start] ;
-               for (i = 0 ; i < length ; i++)
-               {
-                   /* the columns are "clean" (no dead rows) */
-                   ASSERT (ROW_IS_ALIVE (*cp1))  ;
-                   ASSERT (ROW_IS_ALIVE (*cp2))  ;
-                   /* row indices will same order for both supercols, */
-                   /* no gather scatter nessasary */
-                   if (*cp1++ != *cp2++)
-                   {
-                       break ;
-                   }
-               }
-               /* the two columns are different if the for-loop "broke" */
-               if (i != length)
-               {
-                   prev_c = c ;
-                   continue ;
-               }
-               /* === Got it!  two columns are identical =================== */
-               ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ;
-               Col [super_c].shared1.thickness += Col [c].shared1.thickness ;
-               Col [c].shared1.parent = super_c ;
-               KILL_NON_PRINCIPAL_COL (c) ;
-               /* order c later, in order_children() */
-               Col [c].shared2.order = EMPTY ;
-               /* remove c from hash bucket */
-               Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ;
-           }
-       }
-       /* === Empty this hash bucket ======================================= */
-       if (head_column > EMPTY)
-       {
-           /* corresponding degree list "hash" is not empty */
-           Col [head_column].shared3.headhash = EMPTY ;
-       }
-       else
-       {
-           /* corresponding degree list "hash" is empty */
-           head [hash] = EMPTY ;
-       }
-    }
-/* ========================================================================== 
-/* === garbage_collection =================================================== 
-/* ========================================================================== 
-    Defragments and compacts columns and rows in the workspace A.  Used when
-    all avaliable memory has been used while performing row merging.  Returns
-    the index of the first free position in A, after garbage collection.  The
-    time taken by this routine is linear is the size of the array A, which is
-    itself linear in the number of nonzeros in the input matrix.
-    Not user-callable.
-PRIVATE int garbage_collection  /* returns the new value of pfree */
-    /* === Parameters ======================================================= 
-    int n_row,                 /* number of rows */
-    int n_col,                 /* number of columns */
-    Colamd_Row Row [],         /* row info */
-    Colamd_Col Col [],         /* column info */
-    int A [],                  /* A [0 ... Alen-1] holds the matrix */
-    int *pfree                 /* &A [0] ... pfree is in use */
-    /* === Local variables ================================================== 
-    int *psrc ;                        /* source pointer */
-    int *pdest ;               /* destination pointer */
-    int j ;                    /* counter */
-    int r ;                    /* a row index */
-    int c ;                    /* a column index */
-    int length ;               /* length of a row or column */
-#ifndef NDEBUG
-    int debug_rows ;
-    DEBUG2 (("Defrag..\n")) ;
-    for (psrc = &A[0] ; psrc < pfree ; psrc++) ASSERT (*psrc >= 0) ;
-    debug_rows = 0 ;
-#endif /* NDEBUG */
-    /* === Defragment the columns =========================================== 
-    pdest = &A[0] ;
-    for (c = 0 ; c < n_col ; c++)
-    {
-       if (COL_IS_ALIVE (c))
-       {
-           psrc = &A [Col [c].start] ;
-           /* move and compact the column */
-           ASSERT (pdest <= psrc) ;
-           Col [c].start = (int) (pdest - &A [0]) ;
-           length = Col [c].length ;
-           for (j = 0 ; j < length ; j++)
-           {
-               r = *psrc++ ;
-               if (ROW_IS_ALIVE (r))
-               {
-                   *pdest++ = r ;
-               }
-           }
-           Col [c].length = (int) (pdest - &A [Col [c].start]) ;
-       }
-    }
-    /* === Prepare to defragment the rows =================================== 
-    for (r = 0 ; r < n_row ; r++)
-    {
-       if (ROW_IS_ALIVE (r))
-       {
-           if (Row [r].length == 0)
-           {
-               /* this row is of zero length.  cannot compact it, so kill it */
-               DEBUG3 (("Defrag row kill\n")) ;
-               KILL_ROW (r) ;
-           }
-           else
-           {
-               /* save first column index in Row [r].shared2.first_column */
-               psrc = &A [Row [r].start] ;
-               Row [r].shared2.first_column = *psrc ;
-               ASSERT (ROW_IS_ALIVE (r)) ;
-               /* flag the start of the row with the one's complement of row */
-               *psrc = ONES_COMPLEMENT (r) ;
-#ifndef NDEBUG
-               debug_rows++ ;
-#endif /* NDEBUG */
-           }
-       }
-    }
-    /* === Defragment the rows ============================================== 
-    psrc = pdest ;
-    while (psrc < pfree)
-    {
-       /* find a negative number ... the start of a row */
-       if (*psrc++ < 0)
-       {
-           psrc-- ;
-           /* get the row index */
-           r = ONES_COMPLEMENT (*psrc) ;
-           ASSERT (r >= 0 && r < n_row) ;
-           /* restore first column index */
-           *psrc = Row [r].shared2.first_column ;
-           ASSERT (ROW_IS_ALIVE (r)) ;
-           /* move and compact the row */
-           ASSERT (pdest <= psrc) ;
-           Row [r].start = (int) (pdest - &A [0]) ;
-           length = Row [r].length ;
-           for (j = 0 ; j < length ; j++)
-           {
-               c = *psrc++ ;
-               if (COL_IS_ALIVE (c))
-               {
-                   *pdest++ = c ;
-               }
-           }
-           Row [r].length = (int) (pdest - &A [Row [r].start]) ;
-#ifndef NDEBUG
-           debug_rows-- ;
-#endif /* NDEBUG */
-       }
-    }
-    /* ensure we found all the rows */
-    ASSERT (debug_rows == 0) ;
-    /* === Return the new value of pfree ==================================== 
-    return ((int) (pdest - &A [0])) ;
-/* ========================================================================== 
-/* === clear_mark =========================================================== 
-/* ========================================================================== 
-    Clears the Row [].shared2.mark array, and returns the new tag_mark.
-    Return value is the new tag_mark.  Not user-callable.
-PRIVATE int clear_mark /* return the new value for tag_mark */
-    /* === Parameters ======================================================= 
-    int n_row,         /* number of rows in A */
-    Colamd_Row Row []  /* Row [0 ... n_row-1].shared2.mark is set to zero */
-    /* === Local variables ================================================== 
-    int r ;
-    for (r = 0 ; r < n_row ; r++)
-    {
-       if (ROW_IS_ALIVE (r))
-       {
-           Row [r].shared2.mark = 0 ;
-       }
-    }
-    return (1) ;
-/* ========================================================================== 
-/* === print_report ========================================================= 
-/* ========================================================================== 
-PRIVATE void print_report
-    char *method,
-    int stats [COLAMD_STATS]
-    int i1, i2, i3 ;
-    if (!stats)
-    {
-       PRINTF ("%s: No statistics available.\n", method) ;
-       return ;
-    }
-    i1 = stats [COLAMD_INFO1] ;
-    i2 = stats [COLAMD_INFO2] ;
-    i3 = stats [COLAMD_INFO3] ;
-    if (stats [COLAMD_STATUS] >= 0)
-    {
-       PRINTF ("%s: OK.  ", method) ;
-    }
-    else
-    {
-       PRINTF ("%s: ERROR.  ", method) ;
-    }
-    switch (stats [COLAMD_STATUS])
-    {
-           PRINTF ("Matrix has unsorted or duplicate row indices.\n") ;
-           PRINTF ("%s: number of duplicate or out-of-order row indices: %d\n",
-           method, i3) ;
-           PRINTF ("%s: last seen duplicate or out-of-order row index:   %d\n",
-           method, INDEX (i2)) ;
-           PRINTF ("%s: last seen in column:                             %d",
-           method, INDEX (i1)) ;
-           /* no break - fall through to next case instead */
-       case COLAMD_OK:
-           PRINTF ("\n") ;
-           PRINTF ("%s: number of dense or empty rows ignored:           %d\n",
-           method, stats [COLAMD_DENSE_ROW]) ;
-           PRINTF ("%s: number of dense or empty columns ignored:        %d\n",
-           method, stats [COLAMD_DENSE_COL]) ;
-           PRINTF ("%s: number of garbage collections performed:         %d\n",
-           method, stats [COLAMD_DEFRAG_COUNT]) ;
-           break ;
-       case COLAMD_ERROR_A_not_present:
-           PRINTF ("Array A (row indices of matrix) not present.\n") ;
-           break ;
-       case COLAMD_ERROR_p_not_present:
-           PRINTF ("Array p (column pointers for matrix) not present.\n") ;
-           break ;
-       case COLAMD_ERROR_nrow_negative:
-           PRINTF ("Invalid number of rows (%d).\n", i1) ;
-           break ;
-       case COLAMD_ERROR_ncol_negative:
-           PRINTF ("Invalid number of columns (%d).\n", i1) ;
-           break ;
-       case COLAMD_ERROR_nnz_negative:
-           PRINTF ("Invalid number of nonzero entries (%d).\n", i1) ;
-           break ;
-       case COLAMD_ERROR_p0_nonzero:
-           PRINTF ("Invalid column pointer, p [0] = %d, must be zero.\n", i1) ;
-           break ;
-       case COLAMD_ERROR_A_too_small:
-           PRINTF ("Array A too small.\n") ;
-           PRINTF ("        Need Alen >= %d, but given only Alen = %d.\n",
-           i1, i2) ;
-           break ;
-       case COLAMD_ERROR_col_length_negative:
-           PRINTF
-           ("Column %d has a negative number of nonzero entries (%d).\n",
-           INDEX (i1), i2) ;
-           break ;
-       case COLAMD_ERROR_row_index_out_of_bounds:
-           PRINTF
-           ("Row index (row %d) out of bounds (%d to %d) in column %d.\n",
-           INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1)) ;
-           break ;
-       case COLAMD_ERROR_out_of_memory:
-           PRINTF ("Out of memory.\n") ;
-           break ;
-       case COLAMD_ERROR_internal_error:
-           /* if this happens, there is a bug in the code */
-           PRINTF
-           ("Internal error! Please contact authors (\n") ;
-           break ;
-    }
-/* ========================================================================== 
-/* === colamd debugging routines ============================================ 
-/* ========================================================================== 
-/* When debugging is disabled, the remainder of this file is ignored. */
-#ifndef NDEBUG
-/* ========================================================================== 
-/* === debug_structures ===================================================== 
-/* ========================================================================== 
-    At this point, all empty rows and columns are dead.  All live columns
-    are "clean" (containing no dead rows) and simplicial (no supercolumns
-    yet).  Rows may contain dead columns, but all live rows contain at
-    least one live column.
-PRIVATE void debug_structures
-    /* === Parameters ======================================================= 
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int n_col2
-    /* === Local variables ================================================== 
-    int i ;
-    int c ;
-    int *cp ;
-    int *cp_end ;
-    int len ;
-    int score ;
-    int r ;
-    int *rp ;
-    int *rp_end ;
-    int deg ;
-    /* === Check A, Row, and Col ============================================ 
-    for (c = 0 ; c < n_col ; c++)
-    {
-       if (COL_IS_ALIVE (c))
-       {
-           len = Col [c].length ;
-           score = Col [c].shared2.score ;
-           DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ;
-           ASSERT (len > 0) ;
-           ASSERT (score >= 0) ;
-           ASSERT (Col [c].shared1.thickness == 1) ;
-           cp = &A [Col [c].start] ;
-           cp_end = cp + len ;
-           while (cp < cp_end)
-           {
-               r = *cp++ ;
-               ASSERT (ROW_IS_ALIVE (r)) ;
-           }
-       }
-       else
-       {
-           i = Col [c].shared2.order ;
-           ASSERT (i >= n_col2 && i < n_col) ;
-       }
-    }
-    for (r = 0 ; r < n_row ; r++)
-    {
-       if (ROW_IS_ALIVE (r))
-       {
-           i = 0 ;
-           len = Row [r].length ;
-           deg = Row [r] ;
-           ASSERT (len > 0) ;
-           ASSERT (deg > 0) ;
-           rp = &A [Row [r].start] ;
-           rp_end = rp + len ;
-           while (rp < rp_end)
-           {
-               c = *rp++ ;
-               if (COL_IS_ALIVE (c))
-               {
-                   i++ ;
-               }
-           }
-           ASSERT (i > 0) ;
-       }
-    }
-/* ========================================================================== 
-/* === debug_deg_lists ====================================================== 
-/* ========================================================================== 
-    Prints the contents of the degree lists.  Counts the number of columns
-    in the degree list and compares it to the total it should have.  Also
-    checks the row degrees.
-PRIVATE void debug_deg_lists
-    /* === Parameters ======================================================= 
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int head [],
-    int min_score,
-    int should,
-    int max_deg
-    /* === Local variables ================================================== 
-    int deg ;
-    int col ;
-    int have ;
-    int row ;
-    /* === Check the degree lists =========================================== 
-    if (n_col > 10000 && colamd_debug <= 0)
-    {
-       return ;
-    }
-    have = 0 ;
-    DEBUG4 (("Degree lists: %d\n", min_score)) ;
-    for (deg = 0 ; deg <= n_col ; deg++)
-    {
-       col = head [deg] ;
-       if (col == EMPTY)
-       {
-           continue ;
-       }
-       DEBUG4 (("%d:", deg)) ;
-       while (col != EMPTY)
-       {
-           DEBUG4 ((" %d", col)) ;
-           have += Col [col].shared1.thickness ;
-           ASSERT (COL_IS_ALIVE (col)) ;
-           col = Col [col].shared4.degree_next ;
-       }
-       DEBUG4 (("\n")) ;
-    }
-    DEBUG4 (("should %d have %d\n", should, have)) ;
-    ASSERT (should == have) ;
-    /* === Check the row degrees ============================================ 
-    if (n_row > 10000 && colamd_debug <= 0)
-    {
-       return ;
-    }
-    for (row = 0 ; row < n_row ; row++)
-    {
-       if (ROW_IS_ALIVE (row))
-       {
-           ASSERT (Row [row] <= max_deg) ;
-       }
-    }
-/* ========================================================================== 
-/* === debug_mark =========================================================== 
-/* ========================================================================== 
-    Ensures that the tag_mark is less that the maximum and also ensures that
-    each entry in the mark array is less than the tag mark.
-PRIVATE void debug_mark
-    /* === Parameters ======================================================= 
-    int n_row,
-    Colamd_Row Row [],
-    int tag_mark,
-    int max_mark
-    /* === Local variables ================================================== 
-    int r ;
-    /* === Check the Row marks ============================================== 
-    ASSERT (tag_mark > 0 && tag_mark <= max_mark) ;
-    if (n_row > 10000 && colamd_debug <= 0)
-    {
-       return ;
-    }
-    for (r = 0 ; r < n_row ; r++)
-    {
-       ASSERT (Row [r].shared2.mark < tag_mark) ;
-    }
-/* ========================================================================== 
-/* === debug_matrix ========================================================= 
-/* ========================================================================== 
-    Prints out the contents of the columns and the rows.
-PRIVATE void debug_matrix
-    /* === Parameters ======================================================= 
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A []
-    /* === Local variables ================================================== 
-    int r ;
-    int c ;
-    int *rp ;
-    int *rp_end ;
-    int *cp ;
-    int *cp_end ;
-    /* === Dump the rows and columns of the matrix ========================== 
-    if (colamd_debug < 3)
-    {
-       return ;
-    }
-    DEBUG3 (("DUMP MATRIX:\n")) ;
-    for (r = 0 ; r < n_row ; r++)
-    {
-       DEBUG3 (("Row %d alive? %d\n", r, ROW_IS_ALIVE (r))) ;
-       if (ROW_IS_DEAD (r))
-       {
-           continue ;
-       }
-       DEBUG3 (("start %d length %d degree %d\n",
-               Row [r].start, Row [r].length, Row [r] ;
-       rp = &A [Row [r].start] ;
-       rp_end = rp + Row [r].length ;
-       while (rp < rp_end)
-       {
-           c = *rp++ ;
-           DEBUG4 (("  %d col %d\n", COL_IS_ALIVE (c), c)) ;
-       }
-    }
-    for (c = 0 ; c < n_col ; c++)
-    {
-       DEBUG3 (("Col %d alive? %d\n", c, COL_IS_ALIVE (c))) ;
-       if (COL_IS_DEAD (c))
-       {
-           continue ;
-       }
-       DEBUG3 (("start %d length %d shared1 %d shared2 %d\n",
-               Col [c].start, Col [c].length,
-               Col [c].shared1.thickness, Col [c].shared2.score)) ;
-       cp = &A [Col [c].start] ;
-       cp_end = cp + Col [c].length ;
-       while (cp < cp_end)
-       {
-           r = *cp++ ;
-           DEBUG4 (("  %d row %d\n", ROW_IS_ALIVE (r), r)) ;
-       }
-    }
-PRIVATE void colamd_get_debug
-    char *method
-    colamd_debug = 0 ;         /* no debug printing */
-    /* get "D" environment variable, which gives the debug printing level */
-    if (getenv ("D"))
-    {
-       colamd_debug = atoi (getenv ("D")) ;
-    }
-    DEBUG0 (("%s: debug version, D = %d (THIS WILL BE SLOW!)\n",
-       method, colamd_debug)) ;
-#endif /* NDEBUG */
diff --git a/superlu/colamd.h b/superlu/colamd.h
deleted file mode 100644
index 6e30662a..00000000
--- a/superlu/colamd.h
+++ /dev/null
@@ -1,246 +0,0 @@
-/* ========================================================================== 
-/* === colamd/symamd prototypes and definitions ============================= 
-/* ========================================================================== 
-    You must include this file (colamd.h) in any routine that uses colamd,
-    symamd, or the related macros and definitions.
-    Authors:
-       The authors of the code itself are Stefan I. Larimore and Timothy A.
-       Davis (, University of Florida.  The algorithm was
-       developed in collaboration with John Gilbert, Xerox PARC, and Esmond
-       Ng, Oak Ridge National Laboratory.
-    Date:
-       September 8, 2003.  Version 2.3.
-    Acknowledgements:
-       This work was supported by the National Science Foundation, under
-       grants DMS-9504974 and DMS-9803599.
-    Notice:
-       Copyright (c) 1998-2003 by the University of Florida.
-       All Rights Reserved.
-       Permission is hereby granted to use, copy, modify, and/or distribute
-       this program, provided that the Copyright, this License, and the
-       Availability of the original version is retained on all copies and made
-       accessible to the end-user of any code or package that includes COLAMD
-       or any modified version of COLAMD. 
-    Availability:
-       The colamd/symamd library is available at
-       This is the
-       file.  It is required by the colamd.c, colamdmex.c, and symamdmex.c
-       files, and by any C code that calls the routines whose prototypes are
-       listed below, or that uses the colamd/symamd definitions listed below.
-#ifndef COLAMD_H
-#define COLAMD_H
-/* ========================================================================== 
-/* === Include files ======================================================== 
-/* ========================================================================== 
-#include <stdlib.h>
-/* ========================================================================== 
-/* === Knob and statistics definitions ====================================== 
-/* ========================================================================== 
-/* size of the knobs [ ] array.  Only knobs [0..1] are currently used. */
-#define COLAMD_KNOBS 20
-/* number of output statistics.  Only stats [0..6] are currently used. */
-#define COLAMD_STATS 20
-/* knobs [0] and stats [0]: dense row knob and output statistic. */
-/* knobs [1] and stats [1]: dense column knob and output statistic. */
-/* stats [2]: memory defragmentation count output statistic */
-/* stats [3]: colamd status:  zero OK, > 0 warning or notice, < 0 error */
-#define COLAMD_STATUS 3
-/* stats [4..6]: error info, or info on jumbled columns */ 
-#define COLAMD_INFO1 4
-#define COLAMD_INFO2 5
-#define COLAMD_INFO3 6
-/* error codes returned in stats [3]: */
-#define COLAMD_OK                              (0)
-#define COLAMD_OK_BUT_JUMBLED                  (1)
-#define COLAMD_ERROR_A_not_present             (-1)
-#define COLAMD_ERROR_p_not_present             (-2)
-#define COLAMD_ERROR_nrow_negative             (-3)
-#define COLAMD_ERROR_ncol_negative             (-4)
-#define COLAMD_ERROR_nnz_negative              (-5)
-#define COLAMD_ERROR_p0_nonzero                        (-6)
-#define COLAMD_ERROR_A_too_small               (-7)
-#define COLAMD_ERROR_col_length_negative       (-8)
-#define COLAMD_ERROR_row_index_out_of_bounds   (-9)
-#define COLAMD_ERROR_out_of_memory             (-10)
-#define COLAMD_ERROR_internal_error            (-999)
-/* ========================================================================== 
-/* === Row and Column structures ============================================ 
-/* ========================================================================== 
-/* User code that makes use of the colamd/symamd routines need not directly */
-/* reference these structures.  They are used only for the COLAMD_RECOMMENDED 
-/* macro. */
-typedef struct Colamd_Col_struct
-    int start ;                /* index for A of first row in this column, or 
-                       /* if column is dead */
-    int length ;       /* number of rows in this column */
-    union
-    {
-       int thickness ; /* number of original columns represented by this */
-                       /* col, if the column is alive */
-       int parent ;    /* parent in parent tree super-column structure, if */
-                       /* the column is dead */
-    } shared1 ;
-    union
-    {
-       int score ;     /* the score used to maintain heap, if col is alive */
-       int order ;     /* pivot ordering of this column, if col is dead */
-    } shared2 ;
-    union
-    {
-       int headhash ;  /* head of a hash bucket, if col is at the head of */
-                       /* a degree list */
-       int hash ;      /* hash value, if col is not in a degree list */
-       int prev ;      /* previous column in degree list, if col is in a */
-                       /* degree list (but not at the head of a degree list) */
-    } shared3 ;
-    union
-    {
-       int degree_next ;       /* next column, if col is in a degree list */
-       int hash_next ;         /* next column, if col is in a hash list */
-    } shared4 ;
-} Colamd_Col ;
-typedef struct Colamd_Row_struct
-    int start ;                /* index for A of first col in this row */
-    int length ;       /* number of principal columns in this row */
-    union
-    {
-       int degree ;    /* number of principal & non-principal columns in row */
-       int p ;         /* used as a row pointer in init_rows_cols () */
-    } shared1 ;
-    union
-    {
-       int mark ;      /* for computing set differences and marking dead rows*/
-       int first_column ;/* first column in row (used in garbage collection) */
-    } shared2 ;
-} Colamd_Row ;
-/* ========================================================================== 
-/* === Colamd recommended memory size ======================================= 
-/* ========================================================================== 
-    The recommended length Alen of the array A passed to colamd is given by
-    the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro.  It returns -1 if any
-    argument is negative.  2*nnz space is required for the row and column
-    indices of the matrix. COLAMD_C (n_col) + COLAMD_R (n_row) space is
-    required for the Col and Row arrays, respectively, which are internal to
-    colamd.  An additional n_col space is the minimal amount of "elbow room",
-    and nnz/5 more space is recommended for run time efficiency.
-    This macro is not needed when using symamd.
-    Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid
-    gcc -pedantic warning messages.
-#define COLAMD_C(n_col) ((int) (((n_col) + 1) * sizeof (Colamd_Col) / sizeof 
-#define COLAMD_R(n_row) ((int) (((n_row) + 1) * sizeof (Colamd_Row) / sizeof 
-#define COLAMD_RECOMMENDED(nnz, n_row, n_col)                                 \
-(                                                                             \
-((nnz) < 0 || (n_row) < 0 || (n_col) < 0)                                     \
-?                                                                             \
-    (-1)                                                                      \
-:                                                                             \
-    (2 * (nnz) + COLAMD_C (n_col) + COLAMD_R (n_row) + (n_col) + ((nnz) / 5)) \
-/* ========================================================================== 
-/* === Prototypes of user-callable routines ================================= 
-/* ========================================================================== 
-int colamd_recommended         /* returns recommended value of Alen, */
-                               /* or (-1) if input arguments are erroneous */
-    int nnz,                   /* nonzeros in A */
-    int n_row,                 /* number of rows in A */
-    int n_col                  /* number of columns in A */
-) ;
-void colamd_set_defaults       /* sets default parameters */
-(                              /* knobs argument is modified on output */
-    double knobs [COLAMD_KNOBS]        /* parameter settings for colamd */
-) ;
-int colamd                     /* returns (1) if successful, (0) otherwise*/
-(                              /* A and p arguments are modified on output */
-    int n_row,                 /* number of rows in A */
-    int n_col,                 /* number of columns in A */
-    int Alen,                  /* size of the array A */
-    int A [],                  /* row indices of A, of size Alen */
-    int p [],                  /* column pointers of A, of size n_col+1 */
-    double knobs [COLAMD_KNOBS],/* parameter settings for colamd */
-    int stats [COLAMD_STATS]   /* colamd output statistics and error codes */
-) ;
-int symamd                             /* return (1) if OK, (0) otherwise */
-    int n,                             /* number of rows and columns of A */
-    int A [],                          /* row indices of A */
-    int p [],                          /* column pointers of A */
-    int perm [],                       /* output permutation, size n_col+1 */
-    double knobs [COLAMD_KNOBS],       /* parameters (uses defaults if NULL) */
-    int stats [COLAMD_STATS],          /* output statistics and error codes */
-    void * (*allocate) (size_t, size_t),
-                                       /* pointer to calloc (ANSI C) or */
-                                       /* mxCalloc (for MATLAB mexFunction) */
-    void (*release) (void *)
-                                       /* pointer to free (ANSI C) or */
-                                       /* mxFree (for MATLAB mexFunction) */
-) ;
-void colamd_report
-    int stats [COLAMD_STATS]
-) ;
-void symamd_report
-    int stats [COLAMD_STATS]
-) ;
-#endif /* COLAMD_H */
diff --git a/superlu/cpanel_bmod.c b/superlu/cpanel_bmod.c
deleted file mode 100644
index b899953b..00000000
--- a/superlu/cpanel_bmod.c
+++ /dev/null
@@ -1,478 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_cdefs.h"
-extern void ctrsv_();
-extern void cgemv_();
- * Function prototypes 
- */
-void clsolve(int, int, complex *, complex *);
-void cmatvec(int, int, int, complex *, complex *, complex *);
-extern void ccheck_tempv();
-cpanel_bmod (
-           const int  m,          /* in - number of rows in the matrix */
-           const int  w,          /* in */
-           const int  jcol,       /* in */
-           const int  nseg,       /* in */
-           complex     *dense,     /* out, of size n by w */
-           complex     *tempv,     /* working array */
-           int        *segrep,    /* in */
-           int        *repfnz,    /* in, of size n by w */
-           GlobalLU_t *Glu,       /* modified */
-           SuperLUStat_t *stat    /* output */
-           )
- * Purpose
- * =======
- *
- *    Performs numeric block updates (sup-panel) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- *    Before entering this routine, the original nonzeros in the panel 
- *    were already copied into the spa[m,w].
- *
- *    Updated/Output parameters-
- *     dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
- *      collectively in the m-by-w vector dense[*]. 
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-    int          incx = 1, incy = 1;
-    complex       alpha, beta;
-    register int k, ksub;
-    int          fsupc, nsupc, nsupr, nrow;
-    int          krep, krep_ind;
-    complex       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          segsze;
-    int          block_nrow;  /* no of rows in a block row */
-    register int lptr;       /* Points to the row subscripts of a supernode */
-    int          kfnz, irow, no_zeros; 
-    register int isub, isub1, i;
-    register int jj;         /* Index through each column in the panel */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    complex       *lusup;
-    int          *xlusup;
-    int          *repfnz_col; /* repfnz[] for a column in the panel */
-    complex       *dense_col;  /* dense[] for a column in the panel */
-    complex       *tempv1;             /* Used in 1-D update */
-    complex       *TriTmp, *MatvecTmp; /* used in 2-D update */
-    complex      zero = {0.0, 0.0};
-    complex      one = {1.0, 0.0};
-    complex      comp_temp, comp_temp1;
-    register int ldaTmp;
-    register int r_ind, r_hi;
-    static   int first = 1, maxsuper, rowblk, colblk;
-    flops_t  *ops = stat->ops;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       rowblk   = sp_ienv(4);
-       colblk   = sp_ienv(5);
-       first = 0;
-    }
-    ldaTmp = maxsuper + rowblk;
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
-       /* krep = representative of current k-th supernode
-        * fsupc = first supernodal column
-        * nsupc = no of columns in a supernode
-        * nsupr = no of rows in a supernode
-        */
-        krep = segrep[k--];
-       fsupc = xsup[supno[krep]];
-       nsupc = krep - fsupc + 1;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nrow = nsupr - nsupc;
-       lptr = xlsub[fsupc];
-       krep_ind = lptr + nsupc - 1;
-       repfnz_col = repfnz;
-       dense_col = dense;
-       if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
-           TriTmp = tempv;
-           /* Sequence through each column in panel -- triangular solves */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-               ops[TRSV] += 4 * segsze * (segsze - 1);
-               ops[GEMV] += 8 * nrow * segsze;
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
-                       ++luptr;
-                   }
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   luptr1 = luptr - nsupr;
-                   if ( segsze == 2 ) {
-                       cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       c_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++;
-                           cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           c_sub(&dense_col[irow], &dense_col[irow], 
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                       c_sub(&ukj1, &ukj1, &comp_temp);
-                       cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       c_add(&comp_temp, &comp_temp, &comp_temp1);
-                       c_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++; luptr2++;
-                           cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           c_sub(&dense_col[irow], &dense_col[irow], 
-                       }
-                   }
-               } else  {       /* segsze >= 4 */
-                   /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
-                      holds the result of triangular solves.    */
-                   no_zeros = kfnz - fsupc;
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       TriTmp[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-#ifdef _CRAY
-                   CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-                   ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-                   clsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
-               } /* else ... */
-           }  /* for jj ... end tri-solves */
-           /* Block row updates; push all the way into dense[*] block */
-           for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
-               r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
-               block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
-               luptr = xlusup[fsupc] + nsupc + r_ind;
-               isub1 = lptr + nsupc + r_ind;
-               repfnz_col = repfnz;
-               TriTmp = tempv;
-               dense_col = dense;
-               /* Sequence through each column in panel -- matrix-vector */
-               for (jj = jcol; jj < jcol + w; jj++,
-                    repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-                   kfnz = repfnz_col[krep];
-                   if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-                   segsze = krep - kfnz + 1;
-                   if ( segsze <= 3 ) continue;   /* skip unrolled cases */
-                   /* Perform a block update, and scatter the result of
-                      matrix-vector to dense[].                 */
-                   no_zeros = kfnz - fsupc;
-                   luptr1 = luptr + nsupr * no_zeros;
-                   MatvecTmp = &TriTmp[maxsuper];
-                   alpha = one; 
-                    beta = zero;
-#ifdef _CRAY
-                   CGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-                   cgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-                   cmatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
-                          TriTmp, MatvecTmp);
-                   /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
-                    * such that MatvecTmp[*] can be re-used for the
-                    * the next blok row update. dense[] will be copied into 
-                    * global store after the whole panel has been finished.
-                    */
-                   isub = isub1;
-                   for (i = 0; i < block_nrow; i++) {
-                       irow = lsub[isub];
-                       c_sub(&dense_col[irow], &dense_col[irow], 
-                              &MatvecTmp[i]);
-                       MatvecTmp[i] = zero;
-                       ++isub;
-                   }
-               } /* for jj ... */
-           } /* for each block row ... */
-           /* Scatter the triangular solves into SPA dense[*] */
-           repfnz_col = repfnz;
-           TriTmp = tempv;
-           dense_col = dense;
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               if ( segsze <= 3 ) continue; /* skip unrolled cases */
-               no_zeros = kfnz - fsupc;                
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   dense_col[irow] = TriTmp[i];
-                   TriTmp[i] = zero;
-                   ++isub;
-               }
-           } /* for jj ... */
-       } else { /* 1-D block modification */
-           /* Sequence through each column in the panel */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-               ops[TRSV] += 4 * segsze * (segsze - 1);
-               ops[GEMV] += 8 * nrow * segsze;
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
-                       ++luptr;
-                   }
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr1 = luptr - nsupr;
-                   if ( segsze == 2 ) {
-                       cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       c_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr;  ++luptr1;
-                           cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           c_sub(&dense_col[irow], &dense_col[irow], 
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                       c_sub(&ukj1, &ukj1, &comp_temp);
-                       cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       c_add(&comp_temp, &comp_temp, &comp_temp1);
-                       c_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr; ++luptr1; ++luptr2;
-                           cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           c_sub(&dense_col[irow], &dense_col[irow], 
-                       }
-                   }
-               } else  { /* segsze >= 4 */
-                   /* 
-                    * Perform a triangular solve and block update,
-                    * then scatter the result of sup-col update to dense[].
-                    */
-                   no_zeros = kfnz - fsupc;
-                   /* Copy U[*,j] segment from dense[*] to tempv[*]: 
-                    *    The result of triangular solve is in tempv[*];
-                    *    The result of matrix vector update is in dense_col[*]
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       tempv[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-#ifdef _CRAY
-                   CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-                   ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-                   luptr += segsze;    /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                    alpha = one;
-                    beta = zero;
-#ifdef _CRAY
-                   CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-                   cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-                   clsolve ( nsupr, segsze, &lusup[luptr], tempv );
-                   luptr += segsze;        /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                   cmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
-                   /* Scatter tempv[*] into SPA dense[*] temporarily, such
-                    * that tempv[*] can be used for the triangular solve of
-                    * the next column of the panel. They will be copied into 
-                    * ucol[*] after the whole panel has been finished.
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] = tempv[i];
-                       tempv[i] = zero;
-                       isub++;
-                   }
-                   /* Scatter the update from tempv1[*] into SPA dense[*] */
-                   /* Start dense rectangular L */
-                   for (i = 0; i < nrow; i++) {
-                       irow = lsub[isub];
-                       c_sub(&dense_col[irow], &dense_col[irow], &tempv1[i]);
-                       tempv1[i] = zero;
-                       ++isub; 
-                   }
-               } /* else segsze>=4 ... */
-           } /* for each column in the panel... */
-       } /* else 1-D update ... */
-    } /* for each updating supernode ... */
diff --git a/superlu/cpanel_dfs.c b/superlu/cpanel_dfs.c
deleted file mode 100644
index 8d7b9835..00000000
--- a/superlu/cpanel_dfs.c
+++ /dev/null
@@ -1,256 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_cdefs.h"
-cpanel_dfs (
-          const int  m,           /* in - number of rows in the matrix */
-          const int  w,           /* in */
-          const int  jcol,        /* in */
-          SuperMatrix *A,       /* in - original matrix */
-          int        *perm_r,     /* in */
-          int        *nseg,       /* out */
-          complex     *dense,      /* out */
-          int        *panel_lsub, /* out */
-          int        *segrep,     /* out */
-          int        *repfnz,     /* out */
-          int        *xprune,     /* out */
-          int        *marker,     /* out */     
-          int        *parent,     /* working array */
-          int        *xplore,     /* working array */
-          GlobalLU_t *Glu         /* modified */
-          )
- * Purpose
- * =======
- *
- *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives.
- *
- *   The routine returns one list of the supernodal representatives
- *   in topological order of the dfs that generates them. This list is
- *   a superset of the topological order of each individual column within
- *   the panel. 
- *   The location of the first nonzero in each supernodal segment
- *   (supernodal entry location) is also returned. Each column has a 
- *   separate list for this purpose.
- *
- *   Two marker arrays are used for dfs:
- *     marker[i] == jj, if i was visited during dfs of current column jj;
- *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
- *
- *   marker: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- */
-    NCPformat *Astore;
-    complex    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int              krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
-    int       k, krow, kmark, kperm;
-    int       xdfs, maxdfs, kpar;
-    int       jj;         /* index through each column in the panel */
-    int       *marker1;           /* marker1[jj] >= jcol if vertex jj was 
-                             by a previous column within this panel.   */
-    int       *repfnz_col; /* start of each column in the panel */
-    complex    *dense_col;  /* start of each column in the panel */
-    int       nextl_col;   /* next available position in panel_lsub[*,jj] */
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    /* Initialize pointers */
-    Astore     = A->Store;
-    a          = Astore->nzval;
-    asub       = Astore->rowind;
-    xa_begin   = Astore->colbeg;
-    xa_end     = Astore->colend;
-    marker1    = marker + m;
-    repfnz_col = repfnz;
-    dense_col  = dense;
-    *nseg      = 0;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    /* For each column in the panel */
-    for (jj = jcol; jj < jcol + w; jj++) {
-       nextl_col = (jj - jcol) * m;
-#ifdef CHK_DFS
-       printf("\npanel col %d: ", jj);
-       /* For each nonz in A[*,jj] do dfs */
-       for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
-           krow = asub[k];
-            dense_col[krow] = a[k];
-           kmark = marker[krow];       
-           if ( kmark == jj ) 
-               continue;     /* krow visited before, go to the next nonzero */
-           /* For each unmarked nbr krow of jj
-            * krow is in L: place it in structure of L[*,jj]
-            */
-           marker[krow] = jj;
-           kperm = perm_r[krow];
-           if ( kperm == EMPTY ) {
-               panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
-           }
-           /* 
-            * krow is in U: if its supernode-rep krep
-            * has been explored, update repfnz[*]
-            */
-           else {
-               krep = xsup[supno[kperm]+1] - 1;
-               myfnz = repfnz_col[krep];
-#ifdef CHK_DFS
-               printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, 
-               if ( myfnz != EMPTY ) { /* Representative visited before */
-                   if ( myfnz > kperm ) repfnz_col[krep] = kperm;
-                   /* continue; */
-               }
-               else {
-                   /* Otherwise, perform dfs starting at krep */
-                   oldrep = EMPTY;
-                   parent[krep] = oldrep;
-                   repfnz_col[krep] = kperm;
-                   xdfs = xlsub[krep];
-                   maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                   printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
-                   for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                   printf("\n");
-                   do {
-                       /* 
-                        * For each unmarked kchild of krep 
-                        */
-                       while ( xdfs < maxdfs ) {
-                           kchild = lsub[xdfs];
-                           xdfs++;
-                           chmark = marker[kchild];
-                           if ( chmark != jj ) { /* Not reached yet */
-                               marker[kchild] = jj;
-                               chperm = perm_r[kchild];
-                               /* Case kchild is in L: place it in L[*,j] */
-                               if ( chperm == EMPTY ) {
-                                   panel_lsub[nextl_col++] = kchild;
-                               } 
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               else {
-                                   chrep = xsup[supno[chperm]+1] - 1;
-                                   myfnz = repfnz_col[chrep];
-#ifdef CHK_DFS
-                                   printf("chrep %d,myfnz %d,perm_r[%d] 
-                                   if ( myfnz != EMPTY ) { /* Visited before */
-                                       if ( myfnz > chperm )
-                                           repfnz_col[chrep] = chperm;
-                                   }
-                                   else {
-                                       /* Cont. dfs at snode-rep of kchild */
-                                       xplore[krep] = xdfs;    
-                                       oldrep = krep;
-                                       krep = chrep; /* Go deeper down G(L) */
-                                       parent[krep] = oldrep;
-                                       repfnz_col[krep] = chperm;
-                                       xdfs = xlsub[krep];     
-                                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                                       printf("  xdfs %d, maxdfs %d: ", xdfs, 
-                                       for (i = xdfs; i < maxdfs; i++) 
printf(" %d", lsub[i]); 
-                                       printf("\n");
-                                   } /* else */
-                               } /* else */
-                           } /* if... */
-                       } /* while xdfs < maxdfs */
-                       /* krow has no more unexplored nbrs:
-                        *    Place snode-rep krep in postorder DFS, if this 
-                        *    segment is seen for the first time. (Note that
-                        *    "repfnz[krep]" may change later.)
-                        *    Backtrack dfs to its parent.
-                        */
-                       if ( marker1[krep] < jcol ) {
-                           segrep[*nseg] = krep;
-                           ++(*nseg);
-                           marker1[krep] = jj;
-                       }
-                       kpar = parent[krep]; /* Pop stack, mimic recursion */
-                       if ( kpar == EMPTY ) break; /* dfs done */
-                       krep = kpar;
-                       xdfs = xplore[krep];
-                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                       printf("  pop stack: krep %d,xdfs %d,maxdfs %d: ", 
-                       for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                       printf("\n");
-                   } while ( kpar != EMPTY ); /* do-while - until empty stack 
-               } /* else */
-           } /* else */
-       } /* for each nonz in A[*,jj] */
-       repfnz_col += m;    /* Move to next column */
-        dense_col += m;
-    } /* for jj ... */
diff --git a/superlu/cpivotL.c b/superlu/cpivotL.c
deleted file mode 100644
index 006be071..00000000
--- a/superlu/cpivotL.c
+++ /dev/null
@@ -1,171 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <math.h>
-#include <stdlib.h>
-#include "slu_cdefs.h"
-#undef DEBUG
-        const int  jcol,     /* in */
-        const float u,      /* in - diagonal pivoting threshold */
-        int        *usepr,   /* re-use the pivot sequence given by 
perm_r/iperm_r */
-        int        *perm_r,  /* may be modified */
-        int        *iperm_r, /* in - inverse of perm_r */
-        int        *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
-        int        *pivrow,  /* out */
-        GlobalLU_t *Glu,     /* modified - global LU data structures */
-       SuperLUStat_t *stat  /* output */
-       )
- * Purpose
- * =======
- *   Performs the numerical pivoting on the current column of L,
- *   and the CDIV operation.
- *
- *   Pivot policy:
- *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
- *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
- *           pivot row = k;
- *       ELSE IF abs(A_jj) >= thresh THEN
- *           pivot row = j;
- *       ELSE
- *           pivot row = m;
- * 
- *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
- *
- *   Return value: 0      success;
- *                 i > 0  U(i,i) is exactly zero.
- *
- */
-    complex one = {1.0, 0.0};
-    int          fsupc;            /* first column in the supernode */
-    int          nsupc;            /* no of columns in the supernode */
-    int          nsupr;     /* no of rows in the supernode */
-    int          lptr;     /* points to the starting subscript of the 
supernode */
-    int          pivptr, old_pivptr, diag, diagind;
-    float       pivmax, rtemp, thresh;
-    complex       temp;
-    complex       *lu_sup_ptr; 
-    complex       *lu_col_ptr;
-    int          *lsub_ptr;
-    int          isub, icol, k, itemp;
-    int          *lsub, *xlsub;
-    complex       *lusup;
-    int          *xlusup;
-    flops_t      *ops = stat->ops;
-    /* Initialize pointers */
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
-    nsupc      = jcol - fsupc;         /* excluding jcol; nsupc >= 0 */
-    lptr       = xlsub[fsupc];
-    nsupr      = xlsub[fsupc+1] - lptr;
-    lu_sup_ptr = &lusup[xlusup[fsupc]];        /* start of the current 
supernode */
-    lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
-    lsub_ptr   = &lsub[lptr];  /* start of row indices of the supernode */
-#ifdef DEBUG
-if ( jcol == MIN_COL ) {
-    printf("Before cdiv: col %d\n", jcol);
-    for (k = nsupc; k < nsupr; k++) 
-       printf("  lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
-    /* Determine the largest abs numerical value for partial pivoting;
-       Also search for user-specified pivot, and diagonal element. */
-    if ( *usepr ) *pivrow = iperm_r[jcol];
-    diagind = iperm_c[jcol];
-    pivmax = 0.0;
-    pivptr = nsupc;
-    diag = EMPTY;
-    old_pivptr = nsupc;
-    for (isub = nsupc; isub < nsupr; ++isub) {
-        rtemp = c_abs1 (&lu_col_ptr[isub]);
-       if ( rtemp > pivmax ) {
-           pivmax = rtemp;
-           pivptr = isub;
-       }
-       if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
-       if ( lsub_ptr[isub] == diagind ) diag = isub;
-    }
-    /* Test for singularity */
-    if ( pivmax == 0.0 ) {
-       *pivrow = lsub_ptr[pivptr];
-       perm_r[*pivrow] = jcol;
-       *usepr = 0;
-       return (jcol+1);
-    }
-    thresh = u * pivmax;
-    /* Choose appropriate pivotal element by our policy. */
-    if ( *usepr ) {
-        rtemp = c_abs1 (&lu_col_ptr[old_pivptr]);
-       if ( rtemp != 0.0 && rtemp >= thresh )
-           pivptr = old_pivptr;
-       else
-           *usepr = 0;
-    }
-    if ( *usepr == 0 ) {
-       /* Use diagonal pivot? */
-       if ( diag >= 0 ) { /* diagonal exists */
-            rtemp = c_abs1 (&lu_col_ptr[diag]);
-           if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
-        }
-       *pivrow = lsub_ptr[pivptr];
-    }
-    /* Record pivot row */
-    perm_r[*pivrow] = jcol;
-    /* Interchange row subscripts */
-    if ( pivptr != nsupc ) {
-       itemp = lsub_ptr[pivptr];
-       lsub_ptr[pivptr] = lsub_ptr[nsupc];
-       lsub_ptr[nsupc] = itemp;
-       /* Interchange numerical values as well, for the whole snode, such 
-        * that L is indexed the same way as A.
-        */
-       for (icol = 0; icol <= nsupc; icol++) {
-           itemp = pivptr + icol * nsupr;
-           temp = lu_sup_ptr[itemp];
-           lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
-           lu_sup_ptr[nsupc + icol*nsupr] = temp;
-       }
-    } /* if */
-    /* cdiv operation */
-    ops[FACT] += 10 * (nsupr - nsupc);
-    c_div(&temp, &one, &lu_col_ptr[nsupc]);
-    for (k = nsupc+1; k < nsupr; k++) 
-       cc_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp);
-    return 0;
diff --git a/superlu/cpivotgrowth.c b/superlu/cpivotgrowth.c
deleted file mode 100644
index c1dd72a8..00000000
--- a/superlu/cpivotgrowth.c
+++ /dev/null
@@ -1,130 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <math.h>
-#include "slu_cdefs.h"
-cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
-             SuperMatrix *L, SuperMatrix *U)
- * Purpose
- * =======
- *
- * Compute the reciprocal pivot growth factor of the leading ncols columns
- * of the matrix, using the formula:
- *     min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) )
- *
- * Arguments
- * =========
- *
- * ncols    (input) int
- *          The number of columns of matrices A, L and U.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = NC; Dtype = SLU_C; Mtype = GE.
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SC; Dtype = SLU_C; Mtype = TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = NC;
- *          Dtype = SLU_C; Mtype = TRU.
- *
- */
-    NCformat *Astore;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    complex  *Aval, *Lval, *Uval;
-    int      fsupc, nsupr, luptr, nz_in_U;
-    int      i, j, k, oldcol;
-    int      *inv_perm_c;
-    float   rpg, maxaj, maxuj;
-    extern   double slamch_(char *);
-    float   smlnum;
-    complex   *luval;
-    complex   temp_comp;
-    /* Get machine constants. */
-    smlnum = slamch_("S");
-    rpg = 1. / smlnum;
-    Astore = A->Store;
-    Lstore = L->Store;
-    Ustore = U->Store;
-    Aval = Astore->nzval;
-    Lval = Lstore->nzval;
-    Uval = Ustore->nzval;
-    inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int));
-    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;
-    for (k = 0; k <= Lstore->nsuper; ++k) {
-       fsupc = L_FST_SUPC(k);
-       nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-       luptr = L_NZ_START(fsupc);
-       luval = &Lval[luptr];
-       nz_in_U = 1;
-       for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) {
-           maxaj = 0.;
-            oldcol = inv_perm_c[j];
-           for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
-               maxaj = SUPERLU_MAX( maxaj, c_abs1( &Aval[i]) );
-           maxuj = 0.;
-           for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
-               maxuj = SUPERLU_MAX( maxuj, c_abs1( &Uval[i]) );
-           /* Supernode */
-           for (i = 0; i < nz_in_U; ++i)
-               maxuj = SUPERLU_MAX( maxuj, c_abs1( &luval[i]) );
-           ++nz_in_U;
-           luval += nsupr;
-           if ( maxuj == 0. )
-               rpg = SUPERLU_MIN( rpg, 1.);
-           else
-               rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
-       }
-       if ( j >= ncols ) break;
-    }
-    SUPERLU_FREE(inv_perm_c);
-    return (rpg);
diff --git a/superlu/cpruneL.c b/superlu/cpruneL.c
deleted file mode 100644
index f43617f3..00000000
--- a/superlu/cpruneL.c
+++ /dev/null
@@ -1,156 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_cdefs.h"
-       const int  jcol,             /* in */
-       const int  *perm_r,   /* in */
-       const int  pivrow,    /* in */
-       const int  nseg,             /* in */
-       const int  *segrep,   /* in */
-       const int  *repfnz,   /* in */
-       int        *xprune,   /* out */
-       GlobalLU_t *Glu       /* modified - global LU data structures */
-       )
- * Purpose
- * =======
- *   Prunes the L-structure of supernodes whose L-structure
- *   contains the current pivot row "pivrow"
- *
- */
-    complex     utemp;
-    int        jsupno, irep, irep1, kmin, kmax, krow, movnum;
-    int        i, ktemp, minloc, maxloc;
-    int        do_prune; /* logical variable */
-    int        *xsup, *supno;
-    int        *lsub, *xlsub;
-    complex     *lusup;
-    int        *xlusup;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    /*
-     * For each supernode-rep irep in U[*,j]
-     */
-    jsupno = supno[jcol];
-    for (i = 0; i < nseg; i++) {
-       irep = segrep[i];
-       irep1 = irep + 1;
-       do_prune = FALSE;
-       /* Don't prune with a zero U-segment */
-       if ( repfnz[irep] == EMPTY )
-               continue;
-       /* If a snode overlaps with the next panel, then the U-segment 
-        * is fragmented into two parts -- irep and irep1. We should let
-        * pruning occur at the rep-column in irep1's snode. 
-        */
-       if ( supno[irep] == supno[irep1] )      /* Don't prune */
-               continue;
-       /*
-        * If it has not been pruned & it has a nonz in row L[pivrow,i]
-        */
-       if ( supno[irep] != jsupno ) {
-           if ( xprune[irep] >= xlsub[irep1] ) {
-               kmin = xlsub[irep];
-               kmax = xlsub[irep1] - 1;
-               for (krow = kmin; krow <= kmax; krow++) 
-                   if ( lsub[krow] == pivrow ) {
-                       do_prune = TRUE;
-                       break;
-                   }
-           }
-           if ( do_prune ) {
-               /* Do a quicksort-type partition
-                * movnum=TRUE means that the num values have to be exchanged.
-                */
-               movnum = FALSE;
-               if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
-                       movnum = TRUE;
-               while ( kmin <= kmax ) {
-                   if ( perm_r[lsub[kmax]] == EMPTY ) 
-                       kmax--;
-                   else if ( perm_r[lsub[kmin]] != EMPTY )
-                       kmin++;
-                   else { /* kmin below pivrow, and kmax above pivrow: 
-                           *   interchange the two subscripts
-                           */
-                       ktemp = lsub[kmin];
-                       lsub[kmin] = lsub[kmax];
-                       lsub[kmax] = ktemp;
-                       /* If the supernode has only one column, then we
-                        * only keep one set of subscripts. For any subscript 
-                        * interchange performed, similar interchange must be 
-                        * done on the numerical values.
-                        */
-                       if ( movnum ) {
-                           minloc = xlusup[irep] + (kmin - xlsub[irep]);
-                           maxloc = xlusup[irep] + (kmax - xlsub[irep]);
-                           utemp = lusup[minloc];
-                           lusup[minloc] = lusup[maxloc];
-                           lusup[maxloc] = utemp;
-                       }
-                       kmin++;
-                       kmax--;
-                   }
-               } /* while */
-               xprune[irep] = kmin;    /* Pruning */
-#ifdef CHK_PRUNE
-       printf("    After cpruneL(),using col %d:  xprune[%d] = %d\n", 
-                       jcol, irep, kmin);
-           } /* if do_prune */
-       } /* if */
-    } /* for each U-segment... */
diff --git a/superlu/creadhb.c b/superlu/creadhb.c
deleted file mode 100644
index 47572075..00000000
--- a/superlu/creadhb.c
+++ /dev/null
@@ -1,288 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_cdefs.h"
-/* Eat up the rest of the current line */
-int cDumpLine(FILE *fp)
-    register int c;
-    while ((c = fgetc(fp)) != '\n') ;
-    return 0;
-int cParseIntFormat(char *buf, int *num, int *size)
-    char *tmp;
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    sscanf(tmp, "%d", num);
-    while (*tmp != 'I' && *tmp != 'i') ++tmp;
-    ++tmp;
-    sscanf(tmp, "%d", size);
-    return 0;
-int cParseFloatFormat(char *buf, int *num, int *size)
-    char *tmp, *period;
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-    while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd'
-          && *tmp != 'F' && *tmp != 'f') {
-        /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the
-           num picked up refers to P, which should be skipped. */
-        if (*tmp=='p' || *tmp=='P') {
-           ++tmp;
-           *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-        } else {
-           ++tmp;
-        }
-    }
-    ++tmp;
-    period = tmp;
-    while (*period != '.' && *period != ')') ++period ;
-    *period = '\0';
-    *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/
-    return 0;
-int cReadVector(FILE *fp, int n, int *where, int perline, int persize)
-    register int i, j, item;
-    char tmp, buf[100], *dummy;
-    dummy = 0;
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           item = atoi(&buf[j*persize]); 
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-           where[i++] = item - 1;
-       }
-    }
-    return 0;
-/* Read complex numbers as pairs of (real, imaginary) */
-int cReadValues(FILE *fp, int n, complex *destination, int perline, int 
-  register int i, j, k, s, pair;
-  register float realpart;
-  char tmp, buf[100], *dummy;
-  i = pair = 0;
-  while (i < n) {
-    dummy = fgets(buf, 100, fp);    /* read a line at a time */
-    for (j=0; j<perline && i<n; j++) {
-      tmp = buf[(j+1)*persize];     /* save the char at that place */
-      buf[(j+1)*persize] = 0;       /* null terminate */
-      s = j*persize;
-      for (k = 0; k < persize; ++k) /* No D_ format in C */
-        if ( buf[s+k] == 'D' || buf[s+k] == 'd' ) buf[s+k] = 'E';
-      if ( pair == 0 ) {
-        /* The value is real part */
-        realpart = atof(&buf[s]);
-        pair = 1;
-      } else {
-        /* The value is imaginary part */
-        destination[i].r = realpart;
-        destination[i++].i = atof(&buf[s]);
-        pair = 0;
-      }
-      buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-    }
-  }
-  return 0;
-creadhb(int *nrow, int *ncol, int *nonz,
-       complex **nzval, int **rowind, int **colptr)
- * Purpose
- * =======
- * 
- * Read a COMPLEX PRECISION matrix stored in Harwell-Boeing format 
- * as described below.
- * 
- * Line 1 (A72,A8) 
- *     Col. 1 - 72   Title (TITLE) 
- *     Col. 73 - 80  Key (KEY) 
- * 
- * Line 2 (5I14) 
- *     Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
- *     Col. 15 - 28  Number of lines for pointers (PTRCRD) 
- *     Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
- *     Col. 43 - 56  Number of lines for numerical values (VALCRD) 
- *     Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
- *                    (including starting guesses and solution vectors 
- *                    if present) 
- *                   (zero indicates no right-hand side data is present) 
- *
- * Line 3 (A3, 11X, 4I14) 
- *     Col. 1 - 3    Matrix type (see below) (MXTYPE) 
- *     Col. 15 - 28  Number of rows (or variables) (NROW) 
- *     Col. 29 - 42  Number of columns (or elements) (NCOL) 
- *     Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
- *                   (equal to number of entries for assembled matrices) 
- *     Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
- *                   (zero in the case of assembled matrices) 
- * Line 4 (2A16, 2A20) 
- *     Col. 1 - 16   Format for pointers (PTRFMT) 
- *     Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
- *     Col. 33 - 52  Format for numerical values of coefficient matrix 
- *     Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
- *
- * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
- *     Col. 1        Right-hand side type: 
- *                       F for full storage or M for same format as matrix 
- *     Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
- *     Col. 3        X if an exact solution vector(s) is supplied. 
- *     Col. 15 - 28  Number of right-hand sides (NRHS) 
- *     Col. 29 - 42  Number of row indices (NRHSIX) 
- *                   (ignored in case of unassembled matrices) 
- *
- * The three character type field on line 3 describes the matrix type. 
- * The following table lists the permitted values for each of the three 
- * characters. As an example of the type field, RSA denotes that the matrix 
- * is real, symmetric, and assembled. 
- *
- * First Character: 
- *     R Real matrix 
- *     C Complex matrix 
- *     P Pattern only (no numerical values supplied) 
- *
- * Second Character: 
- *     S Symmetric 
- *     U Unsymmetric 
- *     H Hermitian 
- *     Z Skew symmetric 
- *     R Rectangular 
- *
- * Third Character: 
- *     A Assembled 
- *     E Elemental matrices (unassembled) 
- *
- */
-    register int i, numer_lines = 0, rhscrd = 0;
-    int tmp, colnum, colsize, rownum, rowsize, valnum, valsize, dummy;
-    char buf[100], type[4], key[10], *dummyc;
-    FILE *fp;
-    dummy = 0;
-    fp = stdin;
-    /* Line 1 */
-    dummyc = fgets(buf, 100, fp);
-    dummy += fputs(buf, stdout);
-#if 0
-    dummy += fscanf(fp, "%72c", buf); buf[72] = 0;
-    printf("Title: %s", buf);
-    dummy += fscanf(fp, "%8c", key);  key[8] = 0;
-    printf("Key: %s\n", key);
-    cDumpLine(fp);
-    /* Line 2 */
-    for (i=0; i<5; i++) {
-       dummy += fscanf(fp, "%14c", buf); buf[14] = 0;
-       sscanf(buf, "%d", &tmp);
-       if (i == 3) numer_lines = tmp;
-       if (i == 4 && tmp) rhscrd = tmp;
-    }
-    cDumpLine(fp);
-    /* Line 3 */
-    dummy += fscanf(fp, "%3c", type);
-    dummy += fscanf(fp, "%11c", buf); /* pad */
-    type[3] = 0;
-#ifdef DEBUG
-    printf("Matrix type %s\n", type);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nrow);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", ncol);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nonz);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", &tmp);
-    if (tmp != 0)
-         printf("This is not an assembled matrix!\n");
-    if (*nrow != *ncol)
-       printf("Matrix is not square.\n");
-    cDumpLine(fp);
-    /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */
-    callocateA(*ncol, *nonz, nzval, rowind, colptr);
-    /* Line 4: format statement */
-    dummy += fscanf(fp, "%16c", buf);
-    cParseIntFormat(buf, &colnum, &colsize);
-    dummy += fscanf(fp, "%16c", buf);
-    cParseIntFormat(buf, &rownum, &rowsize);
-    dummy += fscanf(fp, "%20c", buf);
-    cParseFloatFormat(buf, &valnum, &valsize);
-    dummy += fscanf(fp, "%20c", buf);
-    cDumpLine(fp);
-    /* Line 5: right-hand side */    
-    if ( rhscrd ) cDumpLine(fp); /* skip RHSFMT */
-#ifdef DEBUG
-    printf("%d rows, %d nonzeros\n", *nrow, *nonz);
-    printf("colnum %d, colsize %d\n", colnum, colsize);
-    printf("rownum %d, rowsize %d\n", rownum, rowsize);
-    printf("valnum %d, valsize %d\n", valnum, valsize);
-    cReadVector(fp, *ncol+1, *colptr, colnum, colsize);
-    cReadVector(fp, *nonz, *rowind, rownum, rowsize);
-    if ( numer_lines ) {
-        cReadValues(fp, *nonz, *nzval, valnum, valsize);
-    }
-    fclose(fp);
diff --git a/superlu/csnode_bmod.c b/superlu/csnode_bmod.c
deleted file mode 100644
index 8c4812c6..00000000
--- a/superlu/csnode_bmod.c
+++ /dev/null
@@ -1,117 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_cdefs.h"
-extern void ctrsv_();
-extern void cgemv_();
- * Performs numeric block updates within the relaxed snode. 
- */
-csnode_bmod (
-           const int  jcol,      /* in */
-           const int  jsupno,    /* in */
-           const int  fsupc,     /* in */
-           complex     *dense,    /* in */
-           complex     *tempv,    /* working array */
-           GlobalLU_t *Glu,      /* modified */
-           SuperLUStat_t *stat   /* output */
-           )
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-    int            incx = 1, incy = 1;
-    complex         alpha = {-1.0, 0.0},  beta = {1.0, 0.0};
-    complex   comp_zero = {0.0, 0.0};
-    int            luptr, nsupc, nsupr, nrow;
-    int            isub, irow, i, iptr; 
-    register int   ufirst, nextlu;
-    int            *lsub, *xlsub;
-    complex         *lusup;
-    int            *xlusup;
-    flops_t *ops = stat->ops;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nextlu = xlusup[jcol];
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = comp_zero;
-       ++nextlu;
-    }
-    xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
-    if ( fsupc < jcol ) {
-       luptr = xlusup[fsupc];
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nsupc = jcol - fsupc;   /* Excluding jcol */
-       ufirst = xlusup[jcol];  /* Points to the beginning of column
-                                  jcol in supernode L\U(jsupno). */
-       nrow = nsupr - nsupc;
-       ops[TRSV] += 4 * nsupc * (nsupc - 1);
-       ops[GEMV] += 8 * nrow * nsupc;
-#ifdef _CRAY
-       CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
-                       &lusup[ufirst], &tempv[0] );
-        /* Scatter tempv[*] into lusup[*] */
-       iptr = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           c_sub(&lusup[iptr], &lusup[iptr], &tempv[i]);
-            ++iptr;
-           tempv[i] = comp_zero;
-       }
-    }
-    return 0;
diff --git a/superlu/csnode_dfs.c b/superlu/csnode_dfs.c
deleted file mode 100644
index c979aa21..00000000
--- a/superlu/csnode_dfs.c
+++ /dev/null
@@ -1,113 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_cdefs.h"
-csnode_dfs (
-          const int  jcol,         /* in - start of the supernode */
-          const int  kcol,         /* in - end of the supernode */
-          const int  *asub,        /* in */
-          const int  *xa_begin,    /* in */
-          const int  *xa_end,      /* in */
-          int        *xprune,      /* out */
-          int        *marker,      /* modified */
-          GlobalLU_t *Glu          /* modified */
-          )
-/* Purpose
- * =======
- *    csnode_dfs() - Determine the union of the row structures of those 
- *    columns within the relaxed snode.
- *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
- *    the portion outside the rectangular supernode must be zero.
- *
- * Return value
- * ============
- *     0   success;
- *    >0   number of bytes allocated when run out of memory.
- *
- */
-    register int i, k, ifrom, ito, nextl, new_next;
-    int          nsuper, krow, kmark, mem_error;
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    int          nzlmax;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-    nsuper = ++supno[jcol];    /* Next available supernode number */
-    nextl = xlsub[jcol];
-    for (i = jcol; i <= kcol; i++) {
-       /* For each nonzero in A[*,i] */
-       for (k = xa_begin[i]; k < xa_end[i]; k++) {     
-           krow = asub[k];
-           kmark = marker[krow];
-           if ( kmark != kcol ) { /* First time visit krow */
-               marker[krow] = kcol;
-               lsub[nextl++] = krow;
-               if ( nextl >= nzlmax ) {
-                   if ( mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, 
Glu) )
-                       return (mem_error);
-                   lsub = Glu->lsub;
-               }
-           }
-       }
-       supno[i] = nsuper;
-    }
-    /* Supernode > 1, then make a copy of the subscripts for pruning */
-    if ( jcol < kcol ) {
-       new_next = nextl + (nextl - xlsub[jcol]);
-       while ( new_next > nzlmax ) {
-           if ( mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-               return (mem_error);
-           lsub = Glu->lsub;
-       }
-       ito = nextl;
-       for (ifrom = xlsub[jcol]; ifrom < nextl; )
-           lsub[ito++] = lsub[ifrom++];        
-        for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
-       nextl = ito;
-    }
-    xsup[nsuper+1] = kcol + 1;
-    supno[kcol+1]  = nsuper;
-    xprune[kcol]   = nextl;
-    xlsub[kcol+1]  = nextl;
-    return 0;
diff --git a/superlu/csp_blas2.c b/superlu/csp_blas2.c
deleted file mode 100644
index f24fa807..00000000
--- a/superlu/csp_blas2.c
+++ /dev/null
@@ -1,577 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * File name:          csp_blas2.c
- * Purpose:            Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-#include "slu_cdefs.h"
-extern void ctrsv_();
-extern void cgemv_();
- * Function prototypes 
- */
-void cusolve(int, int, complex*, complex*);
-void clsolve(int, int, complex*, complex*);
-void cmatvec(int, int, int, complex*, complex*, complex*);
-sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, complex *x, SuperLUStat_t *stat, int *info)
- *   Purpose
- *   =======
- *
- *   sp_ctrsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.
- *                trans = 'C' or 'c'   A^H*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *          
- *   L       - (input) SuperMatrix*
- *            The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_C, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *             The factor U from the factorization Pr*A*Pc=L*U.
- *             U has types: Stype = NC, Dtype = SLU_C, Mtype = TRU.
- *    
- *   x       - (input/output) complex*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-    SCformat *Lstore;
-    NCformat *Ustore;
-    complex   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    complex temp;
-    complex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    complex comp_zero = {0.0, 0.0};
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    complex *work;
-    flops_t solve_ops;
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 
-              !lsame_(trans, "C")) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("sp_ctrsv", &i);
-       return 0;
-    }
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    if ( !(work = complexCalloc(L->nrow)) )
-       ABORT("Malloc fails for work in sp_ctrsv().");
-    if ( lsame_(trans, "N") ) {        /* Form x := inv(A)*x. */
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L)*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               nrow = nsupr - nsupc;
-                /* 1 c_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
-               solve_ops += 8 * nrow * nsupc;
-               if ( nsupc == 1 ) {
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       cc_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
-                       c_sub(&x[irow], &x[irow], &comp_zero);
-                   }
-               } else {
-#ifdef _CRAY
-                   CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-                   ctrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   cgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-                   clsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-                   cmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; ++i, ++iptr) {
-                       irow = L_SUB(iptr);
-                       c_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
-                       work[i] = comp_zero;
-                   }
-               }
-           } /* for k ... */
-       } else {
-           /* Form x := inv(U)*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           for (k = Lstore->nsuper; k >= 0; k--) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-                /* 1 c_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
-               if ( nsupc == 1 ) {
-                   c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-                   for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-                       irow = U_SUB(i);
-                       cc_mult(&comp_zero, &x[fsupc], &Uval[i]);
-                       c_sub(&x[irow], &x[irow], &comp_zero);
-                   }
-               } else {
-#ifdef _CRAY
-                   CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-                      &x[fsupc], &incx);
-                   ctrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-                   cusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-                   for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                       solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                       for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-                               i++) {
-                           irow = U_SUB(i);
-                       cc_mult(&comp_zero, &x[jcol], &Uval[i]);
-                       c_sub(&x[irow], &x[irow], &comp_zero);
-                       }
-                    }
-               }
-           } /* for k ... */
-       }
-    } else if ( lsame_(trans, "T") ) { /* Form x := inv(A')*x */
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L')*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               solve_ops += 8 * (nsupr - nsupc) * nsupc;
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                       cc_mult(&comp_zero, &x[irow], &Lval[i]);
-                       c_sub(&x[jcol], &x[jcol], &comp_zero);
-                       iptr++;
-                   }
-               }
-               if ( nsupc > 1 ) {
-                   solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   ctrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               }
-           }
-       } else {
-           /* Form x := inv(U')*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                       cc_mult(&comp_zero, &x[irow], &Uval[i]);
-                       c_sub(&x[jcol], &x[jcol], &comp_zero);
-                   }
-               }
-                /* 1 c_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
-               if ( nsupc == 1 ) {
-                   c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-                   ctrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-               }
-           } /* for k ... */
-       }
-    } else { /* Form x := conj(inv(A'))*x */
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := conj(inv(L'))*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               solve_ops += 8 * (nsupr - nsupc) * nsupc;
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                        cc_conj(&temp, &Lval[i]);
-                       cc_mult(&comp_zero, &x[irow], &temp);
-                       c_sub(&x[jcol], &x[jcol], &comp_zero);
-                       iptr++;
-                   }
-               }
-               if ( nsupc > 1 ) {
-                   solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd(trans, strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                    ctrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-               }
-           }
-       } else {
-           /* Form x := conj(inv(U'))*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                        cc_conj(&temp, &Uval[i]);
-                       cc_mult(&comp_zero, &x[irow], &temp);
-                       c_sub(&x[jcol], &x[jcol], &comp_zero);
-                   }
-               }
-                /* 1 c_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
-               if ( nsupc == 1 ) {
-                    cc_conj(&temp, &Lval[luptr]);
-                   c_div(&x[fsupc], &x[fsupc], &temp);
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd(trans, strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-                    ctrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr,
-                               &x[fsupc], &incx);
-               }
-           } /* for k ... */
-       }
-    }
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-sp_cgemv(char *trans, complex alpha, SuperMatrix *A, complex *x, 
-        int incx, complex beta, complex *y, int incy)
-/*  Purpose   
-    =======   
-    sp_cgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-    Parameters   
-    ==========   
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-    ALPHA  - (input) complex
-             On entry, ALPHA specifies the scalar alpha.   
-    A      - (input) SuperMatrix*
-             Before entry, the leading m by n part of the array A must   
-             contain the matrix of coefficients.   
-    X      - (input) complex*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-    BETA   - (input) complex
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-    Y      - (output) complex*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-    ==== Sparse Level 2 Blas routine.   
-    /* Local variables */
-    NCformat *Astore;
-    complex   *Aval;
-    int info;
-    complex temp, temp1;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-    complex comp_zero = {0.0, 0.0};
-    complex comp_one = {1.0, 0.0};
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)        info = 8;
-    if (info != 0) {
-       xerbla_("sp_cgemv ", &info);
-       return 0;
-    }
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || 
-       c_eq(&alpha, &comp_zero) && 
-       c_eq(&beta, &comp_one))
-       return 0;
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-       lenx = A->ncol;
-       leny = A->nrow;
-    } else {
-       lenx = A->nrow;
-       leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if ( !c_eq(&beta, &comp_one) ) {
-       if (incy == 1) {
-           if ( c_eq(&beta, &comp_zero) )
-               for (i = 0; i < leny; ++i) y[i] = comp_zero;
-           else
-               for (i = 0; i < leny; ++i) 
-                 cc_mult(&y[i], &beta, &y[i]);
-       } else {
-           iy = ky;
-           if ( c_eq(&beta, &comp_zero) )
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = comp_zero;
-                   iy += incy;
-               }
-           else
-               for (i = 0; i < leny; ++i) {
-                   cc_mult(&y[iy], &beta, &y[iy]);
-                   iy += incy;
-               }
-       }
-    }
-    if ( c_eq(&alpha, &comp_zero) ) return 0;
-    if ( notran ) {
-       /* Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (incy == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               if ( !c_eq(&x[jx], &comp_zero) ) {
-                   cc_mult(&temp, &alpha, &x[jx]);
-                   for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       cc_mult(&temp1, &temp,  &Aval[i]);
-                       c_add(&y[irow], &y[irow], &temp1);
-                   }
-               }
-               jx += incx;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    } else {
-       /* Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (incx == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               temp = comp_zero;
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   cc_mult(&temp1, &Aval[i], &x[irow]);
-                   c_add(&temp, &temp, &temp1);
-               }
-               cc_mult(&temp1, &alpha, &temp);
-               c_add(&y[jy], &y[jy], &temp1);
-               jy += incy;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    }
-    return 0;    
-} /* sp_cgemv */
diff --git a/superlu/csp_blas3.c b/superlu/csp_blas3.c
deleted file mode 100644
index 1f5e628c..00000000
--- a/superlu/csp_blas3.c
+++ /dev/null
@@ -1,141 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:          sp_blas3.c
- * Purpose:            Sparse BLAS3, using some dense BLAS3 operations.
- */
-#include "slu_cdefs.h"
-sp_cgemm(char *transa, char *transb, int m, int n, int k, 
-         complex alpha, SuperMatrix *A, complex *b, int ldb, 
-         complex beta, complex *c, int ldc)
-/*  Purpose   
-    =======   
-    sp_c performs one of the matrix-matrix operations   
-       C := alpha*op( A )*op( B ) + beta*C,   
-    where  op( X ) is one of 
-       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
-    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
-    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
-    Parameters   
-    ==========   
-    TRANSA - (input) char*
-             On entry, TRANSA specifies the form of op( A ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSA = 'N' or 'n',  op( A ) = A.   
-                TRANSA = 'T' or 't',  op( A ) = A'.   
-                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
-             Unchanged on exit.   
-    TRANSB - (input) char*
-             On entry, TRANSB specifies the form of op( B ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSB = 'N' or 'n',  op( B ) = B.   
-                TRANSB = 'T' or 't',  op( B ) = B'.   
-                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
-             Unchanged on exit.   
-    M      - (input) int   
-             On entry,  M  specifies  the number of rows of the matrix 
-            op( A ) and of the matrix C.  M must be at least zero. 
-            Unchanged on exit.   
-    N      - (input) int
-             On entry,  N specifies the number of columns of the matrix 
-            op( B ) and the number of columns of the matrix C. N must be 
-            at least zero.
-            Unchanged on exit.   
-    K      - (input) int
-             On entry, K specifies the number of columns of the matrix 
-            op( A ) and the number of rows of the matrix op( B ). K must 
-            be at least  zero.   
-             Unchanged on exit.
-    ALPHA  - (input) complex
-             On entry, ALPHA specifies the scalar alpha.   
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_C; Mtype = GE. 
-             In the future, more general A can be handled.
-    B      - COMPLEX PRECISION array of DIMENSION ( LDB, kb ), where kb is 
-             n when TRANSB = 'N' or 'n',  and is  k otherwise.   
-             Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
-             part of the array B must contain the matrix B, otherwise 
-             the leading n by k part of the array B must contain the 
-             matrix B.   
-             Unchanged on exit.   
-    LDB    - (input) int
-             On entry, LDB specifies the first dimension of B as declared 
-             in the calling (sub) program. LDB must be at least max( 1, n ).  
-             Unchanged on exit.   
-    BETA   - (input) complex
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then C need not be set on input.   
-    C      - COMPLEX PRECISION array of DIMENSION ( LDC, n ).   
-             Before entry, the leading m by n part of the array C must 
-             contain the matrix C,  except when beta is zero, in which 
-             case C need not be set on entry.   
-             On exit, the array C is overwritten by the m by n matrix 
-            ( alpha*op( A )*B + beta*C ).   
-    LDC    - (input) int
-             On entry, LDC specifies the first dimension of C as declared 
-             in the calling (sub)program. LDC must be at least max(1,m).   
-             Unchanged on exit.   
-    ==== Sparse Level 3 Blas routine.   
-    int    incx = 1, incy = 1;
-    int    j;
-    for (j = 0; j < n; ++j) {
-       sp_cgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
-    }
-    return 0;    
diff --git a/superlu/cutil.c b/superlu/cutil.c
deleted file mode 100644
index 91b45fa7..00000000
--- a/superlu/cutil.c
+++ /dev/null
@@ -1,482 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <math.h>
-#include "slu_cdefs.h"
-cCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      complex *nzval, int *rowind, int *colptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    NCformat *Astore;
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->rowind = rowind;
-    Astore->colptr = colptr;
-cCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      complex *nzval, int *colind, int *rowptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    NRformat *Astore;
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->colind = colind;
-    Astore->rowptr = rowptr;
-/* Copy matrix A into matrix B. */
-cCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
-    NCformat *Astore, *Bstore;
-    int      ncol, nnz, i;
-    B->Stype = A->Stype;
-    B->Dtype = A->Dtype;
-    B->Mtype = A->Mtype;
-    B->nrow  = A->nrow;;
-    B->ncol  = ncol = A->ncol;
-    Astore   = (NCformat *) A->Store;
-    Bstore   = (NCformat *) B->Store;
-    Bstore->nnz = nnz = Astore->nnz;
-    for (i = 0; i < nnz; ++i)
-       ((complex *)Bstore->nzval)[i] = ((complex *)Astore->nzval)[i];
-    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
-    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
-cCreate_Dense_Matrix(SuperMatrix *X, int m, int n, complex *x, int ldx,
-                   Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    DNformat    *Xstore;
-    X->Stype = stype;
-    X->Dtype = dtype;
-    X->Mtype = mtype;
-    X->nrow = m;
-    X->ncol = n;
-    X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
-    Xstore = (DNformat *) X->Store;
-    Xstore->lda = ldx;
-    Xstore->nzval = (complex *) x;
-cCopy_Dense_Matrix(int M, int N, complex *X, int ldx,
-                       complex *Y, int ldy)
- *
- *  Purpose
- *  =======
- *
- *  Copies a two-dimensional matrix X to another matrix Y.
- */
-    int    i, j;
-    for (j = 0; j < N; ++j)
-        for (i = 0; i < M; ++i)
-            Y[i + j*ldy] = X[i + j*ldx];
-cCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 
-                       complex *nzval, int *nzval_colptr, int *rowind,
-                       int *rowind_colptr, int *col_to_sup, int *sup_to_col,
-                       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    SCformat *Lstore;
-    L->Stype = stype;
-    L->Dtype = dtype;
-    L->Mtype = mtype;
-    L->nrow = m;
-    L->ncol = n;
-    L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
-    if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
-    Lstore = L->Store;
-    Lstore->nnz = nnz;
-    Lstore->nsuper = col_to_sup[n];
-    Lstore->nzval = nzval;
-    Lstore->nzval_colptr = nzval_colptr;
-    Lstore->rowind = rowind;
-    Lstore->rowind_colptr = rowind_colptr;
-    Lstore->col_to_sup = col_to_sup;
-    Lstore->sup_to_col = sup_to_col;
- * Convert a row compressed storage into a column compressed storage.
- */
-cCompRow_to_CompCol(int m, int n, int nnz, 
-                   complex *a, int *colind, int *rowptr,
-                   complex **at, int **rowind, int **colptr)
-    register int i, j, col, relpos;
-    int *marker;
-    /* Allocate storage for another copy of the matrix. */
-    *at = (complex *) complexMalloc(nnz);
-    *rowind = (int *) intMalloc(nnz);
-    *colptr = (int *) intMalloc(n+1);
-    marker = (int *) intCalloc(n);
-    /* Get counts of each column of A, and set up column pointers */
-    for (i = 0; i < m; ++i)
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
-    (*colptr)[0] = 0;
-    for (j = 0; j < n; ++j) {
-       (*colptr)[j+1] = (*colptr)[j] + marker[j];
-       marker[j] = (*colptr)[j];
-    }
-    /* Transfer the matrix into the compressed column storage. */
-    for (i = 0; i < m; ++i) {
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
-           col = colind[j];
-           relpos = marker[col];
-           (*rowind)[relpos] = i;
-           (*at)[relpos] = a[j];
-           ++marker[col];
-       }
-    }
-    SUPERLU_FREE(marker);
-cPrint_CompCol_Matrix(char *what, SuperMatrix *A)
-    NCformat     *Astore;
-    register int i,n;
-    float       *dp;
-    printf("\nCompCol matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (NCformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
-    printf("nzval: ");
-    for (i = 0; i < 2*Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%d  ", Astore->rowind[i]);
-    printf("\ncolptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->colptr[i]);
-    printf("\n");
-    fflush(stdout);
-cPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
-    SCformat     *Astore;
-    register int i, j, k, c, d, n, nsup;
-    float       *dp;
-    int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
-    printf("\nSuperNode matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (SCformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    col_to_sup = Astore->col_to_sup;
-    sup_to_col = Astore->sup_to_col;
-    rowind_colptr = Astore->rowind_colptr;
-    rowind = Astore->rowind;
-    printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", 
-          A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
-    printf("nzval:\n");
-    for (k = 0; k <= Astore->nsuper; ++k) {
-      c = sup_to_col[k];
-      nsup = sup_to_col[k+1] - c;
-      for (j = c; j < c + nsup; ++j) {
-       d = Astore->nzval_colptr[j];
-       for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
-         printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d], dp[d+1]);
-          d += 2;      
-       }
-      }
-    }
-#if 0
-    for (i = 0; i < 2*Astore->nzval_colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nnzval_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->nzval_colptr[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->rowind_colptr[n]; ++i) 
-        printf("%d  ", Astore->rowind[i]);
-    printf("\nrowind_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->rowind_colptr[i]);
-    printf("\ncol_to_sup: ");
-    for (i = 0; i < n; ++i) printf("%d  ", col_to_sup[i]);
-    printf("\nsup_to_col: ");
-    for (i = 0; i <= Astore->nsuper+1; ++i) 
-        printf("%d  ", sup_to_col[i]);
-    printf("\n");
-    fflush(stdout);
-cPrint_Dense_Matrix(char *what, SuperMatrix *A)
-    DNformat     *Astore;
-    register int i, j, lda = Astore->lda;
-    float       *dp;
-    printf("\nDense matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    Astore = (DNformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
-    printf("\nnzval: ");
-    for (j = 0; j < A->ncol; ++j) {
-        for (i = 0; i < 2*A->nrow; ++i) printf("%f  ", dp[i + j*2*lda]);
-        printf("\n");
-    }
-    printf("\n");
-    fflush(stdout);
- * Diagnostic print of column "jcol" in the U/L factor.
- */
-cprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
-    int     i, k, fsupc;
-    int     *xsup, *supno;
-    int     *xlsub, *lsub;
-    complex  *lusup;
-    int     *xlusup;
-    complex  *ucol;
-    int     *usub, *xusub;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    printf("%s", msg);
-    printf("col %d: pivrow %d, supno %d, xprune %d\n", 
-          jcol, pivrow, supno[jcol], xprune[jcol]);
-    printf("\tU-col:\n");
-    for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
-       printf("\t%d%10.4f, %10.4f\n", usub[i], ucol[i].r, ucol[i].i);
-    printf("\tL-col in rectangular snode:\n");
-    fsupc = xsup[supno[jcol]]; /* first col of the snode */
-    i = xlsub[fsupc];
-    k = xlusup[jcol];
-    while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
-       printf("\t%d\t%10.4f, %10.4f\n", lsub[i], lusup[k].r, lusup[k].i);
-       i++; k++;
-    }
-    fflush(stdout);
- * Check whether tempv[] == 0. This should be true before and after 
- * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 
- */
-void ccheck_tempv(int n, complex *tempv)
-    int i;
-    for (i = 0; i < n; i++) {
-       if ((tempv[i].r != 0.0) || (tempv[i].i != 0.0))
-       {
-           fprintf(stderr,"tempv[%d] = {%f, %f}\n", i, tempv[i].r, tempv[i].i);
-           ABORT("ccheck_tempv");
-       }
-    }
-cGenXtrue(int n, int nrhs, complex *x, int ldx)
-    int  i, j;
-    for (j = 0; j < nrhs; ++j)
-       for (i = 0; i < n; ++i) {
-           x[i + j*ldx].r = 1.0;
-           x[i + j*ldx].i = 0.0;
-       }
- * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
- */
-cFillRHS(trans_t trans, int nrhs, complex *x, int ldx,
-         SuperMatrix *A, SuperMatrix *B)
-    NCformat *Astore;
-    complex   *Aval;
-    DNformat *Bstore;
-    complex   *rhs;
-    complex one = {1.0, 0.0};
-    complex zero = {0.0, 0.0};
-    int      ldc;
-    char transc[1];
-    Astore = A->Store;
-    Aval   = (complex *) Astore->nzval;
-    Bstore = B->Store;
-    rhs    = Bstore->nzval;
-    ldc    = Bstore->lda;
-    if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
-    else *(unsigned char *)transc = 'T';
-    sp_cgemm(transc, "N", A->nrow, nrhs, A->ncol, one, A,
-            x, ldx, zero, rhs, ldc);
- * Fills a complex precision array with a given value.
- */
-cfill(complex *a, int alen, complex dval)
-    register int i;
-    for (i = 0; i < alen; i++) a[i] = dval;
- * Check the inf-norm of the error vector 
- */
-void cinf_norm_error(int nrhs, SuperMatrix *X, complex *xtrue)
-    DNformat *Xstore;
-    float err, xnorm;
-    complex *Xmat, *soln_work;
-    complex temp;
-    int i, j;
-    Xstore = X->Store;
-    Xmat = Xstore->nzval;
-    for (j = 0; j < nrhs; j++) {
-      soln_work = &Xmat[j*Xstore->lda];
-      err = xnorm = 0.0;
-      for (i = 0; i < X->nrow; i++) {
-        c_sub(&temp, &soln_work[i], &xtrue[i]);
-       err = SUPERLU_MAX(err, c_abs(&temp));
-       xnorm = SUPERLU_MAX(xnorm, c_abs(&soln_work[i]));
-      }
-      err = err / xnorm;
-      printf("||X - Xtrue||/||X|| = %e\n", err);
-    }
-/* Print performance of the code. */
-cPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
-           float rpg, float rcond, float *ferr,
-           float *berr, char *equed, SuperLUStat_t *stat)
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *utime;
-    flops_t  *ops;
-    utime = stat->utime;
-    ops   = stat->ops;
-    if ( utime[FACT] != 0. )
-       printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
-              ops[FACT]*1e-6/utime[FACT]);
-    printf("Identify relaxed snodes    = %8.2f\n", utime[RELAX]);
-    if ( utime[SOLVE] != 0. )
-       printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
-              ops[SOLVE]*1e-6/utime[SOLVE]);
-    Lstore = (SCformat *) L->Store;
-    Ustore = (NCformat *) U->Store;
-    printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
-    printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
-    printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
-          mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
-          mem_usage->expansions);
-    printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
-    printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
-          utime[FACT], ops[FACT]*1e-6/utime[FACT],
-          utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
-          utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
-    printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
-    printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
-          rpg, rcond, ferr[0], berr[0], equed);
-int print_complex_vec(char *what, int n, complex *vec)
-    int i;
-    printf("%s: n %d\n", what, n);
-    for (i = 0; i < n; ++i) printf("%d\t%f%f\n", i, vec[i].r, vec[i].i);
-    return 0;
diff --git a/superlu/dcolumn_bmod.c b/superlu/dcolumn_bmod.c
deleted file mode 100644
index 0f4e2b3c..00000000
--- a/superlu/dcolumn_bmod.c
+++ /dev/null
@@ -1,354 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_ddefs.h"
-extern void dtrsv_();
-extern void dgemv_();
- * Function prototypes 
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-/* Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-dcolumn_bmod (
-            const int  jcol,     /* in */
-            const int  nseg,     /* in */
-            double     *dense,   /* in */
-            double     *tempv,   /* working array */
-            int        *segrep,  /* in */
-            int        *repfnz,  /* in */
-            int        fpanelc,  /* in -- first column in the current panel */
-            GlobalLU_t *Glu,     /* modified */
-            SuperLUStat_t *stat  /* output */
-            )
- * Purpose:
- * ========
- *    Performs numeric block updates (sup-col) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-    int         incx = 1, incy = 1;
-    double      alpha, beta;
-    /* krep = representative of current k-th supernode
-     * fsupc = first supernodal column
-     * nsupc = no of columns in supernode
-     * nsupr = no of rows in supernode (used as leading dimension)
-     * luptr = location of supernodal LU-block in storage
-     * kfnz = first nonz in the k-th supernodal segment
-     * no_zeros = no of leading zeros in a supernodal U-segment
-     */
-    double       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          fsupc, nsupc, nsupr, segsze;
-    int          nrow;   /* No of rows in the matrix of matrix-vector */
-    int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
-    register int lptr, kfnz, isub, irow, i;
-    register int no_zeros, new_next; 
-    int          ufirst, nextlu;
-    int          fst_col; /* First column within small LU update */
-    int          d_fsupc; /* Distance between the first column of the current
-                            panel and the first column of the current snode. */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    double       *lusup;
-    int          *xlusup;
-    int          nzlumax;
-    double       *tempv1;
-    double      zero = 0.0;
-    double      one = 1.0;
-    double      none = -1.0;
-    int          mem_error;
-    flops_t      *ops = stat->ops;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nzlumax = Glu->nzlumax;
-    jcolp1 = jcol + 1;
-    jsupno = supno[jcol];
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k];
-       k--;
-       ksupno = supno[krep];
-       if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
-           fsupc = xsup[ksupno];
-           fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-           /* Distance from the current supernode to the current panel; 
-              d_fsupc=0 if fsupc > fpanelc. */
-           d_fsupc = fst_col - fsupc; 
-           luptr = xlusup[fst_col] + d_fsupc;
-           lptr = xlsub[fsupc] + d_fsupc;
-           kfnz = repfnz[krep];
-           kfnz = SUPERLU_MAX ( kfnz, fpanelc );
-           segsze = krep - kfnz + 1;
-           nsupc = krep - fst_col + 1;
-           nsupr = xlsub[fsupc+1] - xlsub[fsupc];      /* Leading dimension */
-           nrow = nsupr - d_fsupc - nsupc;
-           krep_ind = lptr + nsupc - 1;
-           ops[TRSV] += segsze * (segsze - 1);
-           ops[GEMV] += 2 * nrow * segsze;
-           /* 
-            * Case 1: Update U-segment of size 1 -- col-col update 
-            */
-           if ( segsze == 1 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc;
-               for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                   irow = lsub[i];
-                   dense[irow] -=  ukj*lusup[luptr];
-                   luptr++;
-               }
-           } else if ( segsze <= 3 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc-1;
-               ukj1 = dense[lsub[krep_ind - 1]];
-               luptr1 = luptr - nsupr;
-               if ( segsze == 2 ) { /* Case 2: 2cols-col update */
-                   ukj -= ukj1 * lusup[luptr1];
-                   dense[lsub[krep_ind]] = ukj;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       dense[irow] -= ( ukj*lusup[luptr]
-                                       + ukj1*lusup[luptr1] );
-                   }
-               } else { /* Case 3: 3cols-col update */
-                   ukj2 = dense[lsub[krep_ind - 2]];
-                   luptr2 = luptr1 - nsupr;
-                   ukj1 -= ukj2 * lusup[luptr2-1];
-                   ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                   dense[lsub[krep_ind]] = ukj;
-                   dense[lsub[krep_ind-1]] = ukj1;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       luptr2++;
-                       dense[irow] -= ( ukj*lusup[luptr]
-                            + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                   }
-               }
-           } else {
-               /*
-                * Case: sup-col update
-                * Perform a triangular solve and block update,
-                * then scatter the result of sup-col update to dense
-                */
-               no_zeros = kfnz - fst_col;
-               /* Copy U[*,j] segment from dense[*] to tempv[*] */
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   tempv[i] = dense[irow];
-                   ++isub; 
-               }
-               /* Dense triangular solve -- start effective triangle */
-               luptr += nsupr * no_zeros + no_zeros; 
-#ifdef _CRAY
-               STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-               if (nsupr < segsze) {
-                 fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d 
incx=%d\n", segsze, nsupr, incx);
-                 return -10000000;
-               }
-               dtrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-                alpha = one;
-                beta = zero;
-#ifdef _CRAY
-               SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-               dgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-               dlsolve ( nsupr, segsze, &lusup[luptr], tempv );
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-               dmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
-                /* Scatter tempv[] into SPA dense[] as a temporary storage */
-                isub = lptr + no_zeros;
-                for (i = 0; i < segsze; i++) {
-                    irow = lsub[isub];
-                    dense[irow] = tempv[i];
-                    tempv[i] = zero;
-                    ++isub;
-                }
-               /* Scatter tempv1[] into SPA dense[] */
-               for (i = 0; i < nrow; i++) {
-                   irow = lsub[isub];
-                   dense[irow] -= tempv1[i];
-                   tempv1[i] = zero;
-                   ++isub;
-               }
-           }
-       } /* if jsupno ... */
-    } /* for each segment... */
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    nextlu = xlusup[jcol];
-    fsupc = xsup[jsupno];
-    /* Copy the SPA dense into L\U[*,j] */
-    new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
-    while ( new_next > nzlumax ) {
-       if (mem_error = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
-           return (mem_error);
-       lusup = Glu->lusup;
-       lsub = Glu->lsub;
-    }
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = zero;
-       ++nextlu;
-    }
-    xlusup[jcolp1] = nextlu;   /* Close L\U[*,jcol] */
-    /* For more updates within the panel (also within the current supernode), 
-     * should start from the first column of the panel, or the first column 
-     * of the supernode, whichever is bigger. There are 2 cases:
-     *    1) fsupc < fpanelc, then fst_col := fpanelc
-     *    2) fsupc >= fpanelc, then fst_col := fsupc
-     */
-    fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-    if ( fst_col < jcol ) {
-       /* Distance between the current supernode and the current panel.
-          d_fsupc=0 if fsupc >= fpanelc. */
-       d_fsupc = fst_col - fsupc;
-       lptr = xlsub[fsupc] + d_fsupc;
-       luptr = xlusup[fst_col] + d_fsupc;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];  /* Leading dimension */
-       nsupc = jcol - fst_col; /* Excluding jcol */
-       nrow = nsupr - d_fsupc - nsupc;
-       /* Points to the beginning of jcol in snode L\U(jsupno) */
-       ufirst = xlusup[jcol] + d_fsupc;        
-       ops[TRSV] += nsupc * (nsupc - 1);
-       ops[GEMV] += 2 * nrow * nsupc;
-#ifdef _CRAY
-       STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-       if (nsupr < nsupc) {
-         fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d incx=%d\n", 
nsupc, nsupr, incx);
-         return -10000000;
-       }
-       dtrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-       alpha = none; beta = one; /* y := beta*y + alpha*A*x */
-#ifdef _CRAY
-       SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       dgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       dlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       dmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
-               &lusup[ufirst], tempv );
-        /* Copy updates from tempv[*] into lusup[*] */
-       isub = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           lusup[isub] -= tempv[i];
-           tempv[i] = 0.0;
-           ++isub;
-       }
-    } /* if fst_col < jcol ... */ 
-    return 0;
diff --git a/superlu/dcolumn_dfs.c b/superlu/dcolumn_dfs.c
deleted file mode 100644
index f62ab50c..00000000
--- a/superlu/dcolumn_dfs.c
+++ /dev/null
@@ -1,267 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_ddefs.h"
-/* What type of supernodes we want */
-#define T2_SUPER
-          const int  m,         /* in - number of rows in the matrix */
-          const int  jcol,      /* in */
-          int        *perm_r,   /* in */
-          int        *nseg,     /* modified - with new segments appended */
-          int        *lsub_col, /* in - defines the RHS vector to start the 
dfs */
-          int        *segrep,   /* modified - with new segments appended */
-          int        *repfnz,   /* modified */
-          int        *xprune,   /* modified */
-          int        *marker,   /* modified */
-          int        *parent,   /* working array */
-          int        *xplore,   /* working array */
-          GlobalLU_t *Glu       /* modified */
-          )
- * Purpose
- * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
- *   decide the supernode boundary.
- *
- *   This routine does not use numeric values, but only use the RHS 
- *   row indices to start the dfs.
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives. The routine returns a list of such supernodal 
- *   representatives in topological order of the dfs that generates them.
- *   The location of the first nonzero in each such supernodal segment
- *   (supernodal entry location) is also returned.
- *
- * Local parameters
- * ================
- *   nseg: no of segments in current U[*,j]
- *   jsuper: jsuper=EMPTY if column j does not belong to the same
- *     supernode as j-1. Otherwise, jsuper=nsuper.
- *
- *   marker2: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- * Return value
- * ============
- *     0  success;
- *   > 0  number of bytes allocated when run out of space.
- *
- */
-    int     jcolp1, jcolm1, jsuper, nsuper, nextl;
-    int     k, krep, krow, kmark, kperm;
-    int     *marker2;           /* Used for small panel LU */
-    int            fsupc;              /* First column of a snode */
-    int     myfnz;             /* First nonz column of a U-segment */
-    int            chperm, chmark, chrep, kchild;
-    int     xdfs, maxdfs, kpar, oldrep;
-    int     jptr, jm1ptr;
-    int     ito, ifrom, istop; /* Used to compress row subscripts */
-    int     mem_error;
-    int     *xsup, *supno, *lsub, *xlsub;
-    int     nzlmax;
-    static  int  first = 1, maxsuper;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       first = 0;
-    }
-    jcolp1  = jcol + 1;
-    jcolm1  = jcol - 1;
-    nsuper  = supno[jcol];
-    jsuper  = nsuper;
-    nextl   = xlsub[jcol];
-    marker2 = &marker[2*m];
-    /* For each nonzero in A[*,jcol] do dfs */
-    for (k = 0; lsub_col[k] != EMPTY; k++) {
-       krow = lsub_col[k];
-       lsub_col[k] = EMPTY;
-       kmark = marker2[krow];          
-       /* krow was visited before, go to the next nonz */
-        if ( kmark == jcol ) continue; 
-       /* For each unmarked nbr krow of jcol
-        *      krow is in L: place it in structure of L[*,jcol]
-        */
-       marker2[krow] = jcol;
-       kperm = perm_r[krow];
-       if ( kperm == EMPTY ) {
-           lsub[nextl++] = krow;       /* krow is indexed into A */
-           if ( nextl >= nzlmax ) {
-               if ( mem_error = dLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-                   return (mem_error);
-               lsub = Glu->lsub;
-           }
-            if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing 
-       } else {
-           /*  krow is in U: if its supernode-rep krep
-            *  has been explored, update repfnz[*]
-            */
-           krep = xsup[supno[kperm]+1] - 1;
-           myfnz = repfnz[krep];
-           if ( myfnz != EMPTY ) {     /* Visited before */
-               if ( myfnz > kperm ) repfnz[krep] = kperm;
-               /* continue; */
-           }
-           else {
-               /* Otherwise, perform dfs starting at krep */
-               oldrep = EMPTY;
-               parent[krep] = oldrep;
-               repfnz[krep] = kperm;
-               xdfs = xlsub[krep];
-               maxdfs = xprune[krep];
-               do {
-                   /* 
-                    * For each unmarked kchild of krep 
-                    */
-                   while ( xdfs < maxdfs ) {
-                       kchild = lsub[xdfs];
-                       xdfs++;
-                       chmark = marker2[kchild];
-                       if ( chmark != jcol ) { /* Not reached yet */
-                           marker2[kchild] = jcol;
-                           chperm = perm_r[kchild];
-                           /* Case kchild is in L: place it in L[*,k] */
-                           if ( chperm == EMPTY ) {
-                               lsub[nextl++] = kchild;
-                               if ( nextl >= nzlmax ) {
-                                   if ( mem_error =
dLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu) )
-                                       return (mem_error);
-                                   lsub = Glu->lsub;
-                               }
-                               if ( chmark != jcolm1 ) jsuper = EMPTY;
-                           } else {
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               chrep = xsup[supno[chperm]+1] - 1;
-                               myfnz = repfnz[chrep];
-                               if ( myfnz != EMPTY ) { /* Visited before */
-                                   if ( myfnz > chperm )
-                                       repfnz[chrep] = chperm;
-                               } else {
-                                   /* Continue dfs at super-rep of kchild */
-                                   xplore[krep] = xdfs;        
-                                   oldrep = krep;
-                                   krep = chrep; /* Go deeper down G(L^t) */
-                                   parent[krep] = oldrep;
-                                   repfnz[krep] = chperm;
-                                   xdfs = xlsub[krep];     
-                                   maxdfs = xprune[krep];
-                               } /* else */
-                          } /* else */
-                       } /* if */
-                   } /* while */
-                   /* krow has no more unexplored nbrs;
-                    *    place supernode-rep krep in postorder DFS.
-                    *    backtrack dfs to its parent
-                    */
-                   segrep[*nseg] = krep;
-                   ++(*nseg);
-                   kpar = parent[krep]; /* Pop from stack, mimic recursion */
-                   if ( kpar == EMPTY ) break; /* dfs done */
-                   krep = kpar;
-                   xdfs = xplore[krep];
-                   maxdfs = xprune[krep];
-               } while ( kpar != EMPTY );      /* Until empty stack */
-           } /* else */
-       } /* else */
-    } /* for each nonzero ... */
-    /* Check to see if j belongs in the same supernode as j-1 */
-    if ( jcol == 0 ) { /* Do nothing for column 0 */
-       nsuper = supno[0] = 0;
-    } else {
-       fsupc = xsup[nsuper];
-       jptr = xlsub[jcol];     /* Not compressed yet */
-       jm1ptr = xlsub[jcolm1];
-#ifdef T2_SUPER
-       if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
-       /* Make sure the number of columns in a supernode doesn't
-          exceed threshold. */
-       if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
-       /* If jcol starts a new supernode, reclaim storage space in
-        * lsub from the previous supernode. Note we only store
-        * the subscript set of the first and last columns of
-        * a supernode. (first for num values, last for pruning)
-        */
-       if ( jsuper == EMPTY ) {        /* starts a new supernode */
-           if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
-               printf("  Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
-               ito = xlsub[fsupc+1];
-               xlsub[jcolm1] = ito;
-               istop = ito + jptr - jm1ptr;
-               xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
-               xlsub[jcol] = istop;
-               for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
-                   lsub[ito] = lsub[ifrom];
-               nextl = ito;            /* = istop + length(jcol) */
-           }
-           nsuper++;
-           supno[jcol] = nsuper;
-       } /* if a new supernode */
-    }  /* else: jcol > 0 */ 
-    /* Tidy up the pointers before exit */
-    xsup[nsuper+1] = jcolp1;
-    supno[jcolp1]  = nsuper;
-    xprune[jcol]   = nextl;    /* Initialize upper bound for pruning */
-    xlsub[jcolp1]  = nextl;
-    return 0;
diff --git a/superlu/dcomplex.c b/superlu/dcomplex.c
deleted file mode 100644
index b8e41c7f..00000000
--- a/superlu/dcomplex.c
+++ /dev/null
@@ -1,116 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * This file defines common arithmetic operations for complex type.
- */
-#include <math.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "slu_dcomplex.h"
-/* Complex Division c = a/b */
-void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b)
-    double ratio, den;
-    double abr, abi, cr, ci;
-    if( (abr = b->r) < 0.)
-       abr = - abr;
-    if( (abi = b->i) < 0.)
-       abi = - abi;
-    if( abr <= abi ) {
-       if (abi == 0) {
-           fprintf(stderr, "z_div.c: division by zero\n");
-            exit(-1);
-       }         
-       ratio = b->r / b->i ;
-       den = b->i * (1 + ratio*ratio);
-       cr = (a->r*ratio + a->i) / den;
-       ci = (a->i*ratio - a->r) / den;
-    } else {
-       ratio = b->i / b->r ;
-       den = b->r * (1 + ratio*ratio);
-       cr = (a->r + a->i*ratio) / den;
-       ci = (a->i - a->r*ratio) / den;
-    }
-    c->r = cr;
-    c->i = ci;
-/* Returns sqrt(z.r^2 + z.i^2) */
-double z_abs(doublecomplex *z)
-    double temp;
-    double real = z->r;
-    double imag = z->i;
-    if (real < 0) real = -real;
-    if (imag < 0) imag = -imag;
-    if (imag > real) {
-       temp = real;
-       real = imag;
-       imag = temp;
-    }
-    if ((real+imag) == real) return(real);
-    temp = imag/real;
-    temp = real*sqrt(1.0 + temp*temp);  /*overflow!!*/
-    return (temp);
-/* Approximates the abs */
-/* Returns abs(z.r) + abs(z.i) */
-double z_abs1(doublecomplex *z)
-    double real = z->r;
-    double imag = z->i;
-    if (real < 0) real = -real;
-    if (imag < 0) imag = -imag;
-    return (real + imag);
-/* Return the exponentiation */
-void z_exp(doublecomplex *r, doublecomplex *z)
-    double expx;
-    expx = exp(z->r);
-    r->r = expx * cos(z->i);
-    r->i = expx * sin(z->i);
-/* Return the complex conjugate */
-void d_cnjg(doublecomplex *r, doublecomplex *z)
-    r->r = z->r;
-    r->i = -z->i;
-/* Return the imaginary part */
-double d_imag(doublecomplex *z)
-    return (z->i);
diff --git a/superlu/dcopy_to_ucol.c b/superlu/dcopy_to_ucol.c
deleted file mode 100644
index 4d2bed09..00000000
--- a/superlu/dcopy_to_ucol.c
+++ /dev/null
@@ -1,112 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_ddefs.h"
-             int        jcol,    /* in */
-             int        nseg,    /* in */
-             int        *segrep,  /* in */
-             int        *repfnz,  /* in */
-             int        *perm_r,  /* in */
-             double     *dense,   /* modified - reset to zero on return */
-             GlobalLU_t *Glu      /* modified */
-             )
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int ksub, krep, ksupno;
-    int i, k, kfnz, segsze;
-    int fsupc, isub, irow;
-    int jsupno, nextu;
-    int new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    double    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-    double zero = 0.0;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k--];
-       ksupno = supno[krep];
-       if ( ksupno != jsupno ) { /* Should go into ucol[] */
-           kfnz = repfnz[krep];
-           if ( kfnz != EMPTY ) {      /* Nonzero U-segment */
-               fsupc = xsup[ksupno];
-               isub = xlsub[fsupc] + kfnz - fsupc;
-               segsze = krep - kfnz + 1;
-               new_next = nextu + segsze;
-               while ( new_next > nzumax ) {
-                   if (mem_error = dLUMemXpand(jcol, nextu, UCOL, &nzumax, 
-                       return (mem_error);
-                   ucol = Glu->ucol;
-                   if (mem_error = dLUMemXpand(jcol, nextu, USUB, &nzumax, 
-                       return (mem_error);
-                   usub = Glu->usub;
-                   lsub = Glu->lsub;
-               }
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   usub[nextu] = perm_r[irow];
-                   ucol[nextu] = dense[irow];
-                   dense[irow] = zero;
-                   nextu++;
-                   isub++;
-               } 
-           }
-       }
-    } /* for each segment... */
-    xusub[jcol + 1] = nextu;      /* Close U[*,jcol] */
-    return 0;
diff --git a/superlu/dgscon.c b/superlu/dgscon.c
deleted file mode 100644
index 2e8e81bd..00000000
--- a/superlu/dgscon.c
+++ /dev/null
@@ -1,156 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * File name:  dgscon.c
- * History:     Modified from lapack routines DGECON.
- */
-#include <math.h>
-#include "slu_ddefs.h"
-dgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
-       double anorm, double *rcond, SuperLUStat_t *stat, int *info)
-    Purpose   
-    =======   
-    DGSCON estimates the reciprocal of the condition number of a general 
-    real matrix A, in either the 1-norm or the infinity-norm, using   
-    the LU factorization computed by DGETRF.   
-    An estimate is obtained for norm(inv(A)), and the reciprocal of the   
-    condition number is computed as   
-       RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    NORM    (input) char*
-            Specifies whether the 1-norm condition number or the   
-            infinity-norm condition number is required:   
-            = '1' or 'O':  1-norm;   
-            = 'I':         Infinity-norm.
-    L       (input) SuperMatrix*
-            The factor L from the factorization Pr*A*Pc=L*U as computed by
-            dgstrf(). Use compressed row subscripts storage for supernodes,
-            i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
-    U       (input) SuperMatrix*
-            The factor U from the factorization Pr*A*Pc=L*U as computed by
-            dgstrf(). Use column-wise storage scheme, i.e., U has types:
-            Stype = SLU_NC, Dtype = SLU_D, Mtype = TRU.
-    ANORM   (input) double
-            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
-            If NORM = 'I', the infinity-norm of the original matrix A.
-    RCOND   (output) double*
-            The reciprocal of the condition number of the matrix A,   
-            computed as RCOND = 1/(norm(A) * norm(inv(A))).
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-    ===================================================================== 
-    /* Local variables */
-    int    kase, kase1, onenrm, i;
-    double ainvnm;
-    double *work;
-    int    *iwork;
-    extern int drscl_(int *, double *, double *, int *);
-    extern int dlacon_(int *, double *, double *, int *, double *, int *);
-    /* Test the input parameters. */
-    *info = 0;
-    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
-    if (! onenrm && ! lsame_(norm, "I")) *info = -1;
-    else if (L->nrow < 0 || L->nrow != L->ncol ||
-             L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU)
-        *info = -2;
-    else if (U->nrow < 0 || U->nrow != U->ncol ||
-             U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU) 
-       *info = -3;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("dgscon", &i);
-       return;
-    }
-    /* Quick return if possible */
-    *rcond = 0.;
-    if ( L->nrow == 0 || U->nrow == 0) {
-       *rcond = 1.;
-       return;
-    }
-    work = doubleCalloc( 3*L->nrow );
-    iwork = intMalloc( L->nrow );
-    if ( !work || !iwork )
-       ABORT("Malloc fails for work arrays in dgscon.");
-    /* Estimate the norm of inv(A). */
-    ainvnm = 0.;
-    if ( onenrm ) kase1 = 1;
-    else kase1 = 2;
-    kase = 0;
-    do {
-       dlacon_(&L->nrow, &work[L->nrow], &work[0], &iwork[0], &ainvnm, &kase);
-       if (kase == 0) break;
-       if (kase == kase1) {
-           /* Multiply by inv(L). */
-           sp_dtrsv("L", "No trans", "Unit", L, U, &work[0], stat, info);
-           /* Multiply by inv(U). */
-           sp_dtrsv("U", "No trans", "Non-unit", L, U, &work[0], stat, info);
-       } else {
-           /* Multiply by inv(U'). */
-           sp_dtrsv("U", "Transpose", "Non-unit", L, U, &work[0], stat, info);
-           /* Multiply by inv(L'). */
-           sp_dtrsv("L", "Transpose", "Unit", L, U, &work[0], stat, info);
-       }
-    } while ( kase != 0 );
-    /* Compute the estimate of the reciprocal condition number. */
-    if (ainvnm != 0.) *rcond = (1. / ainvnm) / anorm;
-    SUPERLU_FREE (work);
-    SUPERLU_FREE (iwork);
-    return;
-} /* dgscon */
diff --git a/superlu/dgsequ.c b/superlu/dgsequ.c
deleted file mode 100644
index dcd42a5b..00000000
--- a/superlu/dgsequ.c
+++ /dev/null
@@ -1,206 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  dgsequ.c
- * History:     Modified from LAPACK routine DGEEQU
- */
-#include <math.h>
-#include "slu_ddefs.h"
-dgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd,
-       double *colcnd, double *amax, int *info)
-    Purpose   
-    =======   
-    DGSEQU computes row and column scalings intended to equilibrate an   
-    M-by-N sparse matrix A and reduce its condition number. R returns the row
-    scale factors and C the column scale factors, chosen to try to make   
-    the largest element in each row and column of the matrix B with   
-    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
-    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
-    number and BIGNUM = largest safe number.  Use of these scaling   
-    factors is not guaranteed to reduce the condition number of A but   
-    works well in practice.   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    A       (input) SuperMatrix*
-            The matrix of dimension (A->nrow, A->ncol) whose equilibration
-            factors are to be computed. The type of A can be:
-            Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
-    R       (output) double*, size A->nrow
-            If INFO = 0 or INFO > M, R contains the row scale factors   
-            for A.
-    C       (output) double*, size A->ncol
-            If INFO = 0,  C contains the column scale factors for A.
-    ROWCND  (output) double*
-            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
-            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
-            AMAX is neither too large nor too small, it is not worth   
-            scaling by R.
-    COLCND  (output) double*
-            If INFO = 0, COLCND contains the ratio of the smallest   
-            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
-            worth scaling by C.
-    AMAX    (output) double*
-            Absolute value of largest matrix element.  If AMAX is very   
-            close to overflow or very close to underflow, the matrix   
-            should be scaled.
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-            > 0:  if INFO = i,  and i is   
-                  <= A->nrow:  the i-th row of A is exactly zero   
-                  >  A->ncol:  the (i-M)-th column of A is exactly zero   
-    ===================================================================== 
-    /* Local variables */
-    NCformat *Astore;
-    double   *Aval;
-    int i, j, irow;
-    double rcmin, rcmax;
-    double bignum, smlnum;
-    extern double dlamch_(char *);
-    /* Test the input parameters. */
-    *info = 0;
-    if ( A->nrow < 0 || A->ncol < 0 ||
-        A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE )
-       *info = -1;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("dgsequ", &i);
-       return;
-    }
-    /* Quick return if possible */
-    if ( A->nrow == 0 || A->ncol == 0 ) {
-       *rowcnd = 1.;
-       *colcnd = 1.;
-       *amax = 0.;
-       return;
-    }
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Get machine constants. */
-    smlnum = dlamch_("S");
-    bignum = 1. / smlnum;
-    /* Compute row scale factors. */
-    for (i = 0; i < A->nrow; ++i) r[i] = 0.;
-    /* Find the maximum element in each row. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           r[irow] = SUPERLU_MAX( r[irow], fabs(Aval[i]) );
-       }
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (i = 0; i < A->nrow; ++i) {
-       rcmax = SUPERLU_MAX(rcmax, r[i]);
-       rcmin = SUPERLU_MIN(rcmin, r[i]);
-    }
-    *amax = rcmax;
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (i = 0; i < A->nrow; ++i)
-           if (r[i] == 0.) {
-               *info = i + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (i = 0; i < A->nrow; ++i)
-           r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum );
-       /* Compute ROWCND = min(R(I)) / max(R(I)) */
-       *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-    /* Compute column scale factors */
-    for (j = 0; j < A->ncol; ++j) c[j] = 0.;
-    /* Find the maximum element in each column, assuming the row
-       scalings computed above. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           c[j] = SUPERLU_MAX( c[j], fabs(Aval[i]) * r[irow] );
-       }
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (j = 0; j < A->ncol; ++j) {
-       rcmax = SUPERLU_MAX(rcmax, c[j]);
-       rcmin = SUPERLU_MIN(rcmin, c[j]);
-    }
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (j = 0; j < A->ncol; ++j)
-           if ( c[j] == 0. ) {
-               *info = A->nrow + j + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (j = 0; j < A->ncol; ++j)
-           c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum);
-       /* Compute COLCND = min(C(J)) / max(C(J)) */
-       *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-    return;
-} /* dgsequ */
diff --git a/superlu/dgsrfs.c b/superlu/dgsrfs.c
deleted file mode 100644
index c92aecd2..00000000
--- a/superlu/dgsrfs.c
+++ /dev/null
@@ -1,447 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * File name:  dgsrfs.c
- * History:     Modified from lapack routine DGERFS
- */
-#include <math.h>
-#include "slu_ddefs.h"
-dgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
-       int *perm_c, int *perm_r, char *equed, double *R, double *C,
-       SuperMatrix *B, SuperMatrix *X, double *ferr, double *berr,
-       SuperLUStat_t *stat, int *info)
- *   Purpose   
- *   =======   
- *
- *   DGSRFS improves the computed solution to a system of linear   
- *   equations and provides error bounds and backward error estimates for 
- *   the solution.   
- *
- *   If equilibration was performed, the system becomes:
- *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- *   Arguments   
- *   =========   
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *   
- *   A       (input) SuperMatrix*
- *           The original matrix A in the system, or the scaled A if
- *           equilibration was done. The type of A can be:
- *           Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_GE.
- *    
- *   L       (input) SuperMatrix*
- *          The factor L from the factorization Pr*A*Pc=L*U. Use
- *           compressed row subscripts storage for supernodes, 
- *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = 
- * 
- *   U       (input) SuperMatrix*
- *           The factor U from the factorization Pr*A*Pc=L*U as computed by
- *           dgstrf(). Use column-wise storage scheme, 
- *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- *   perm_c  (input) int*, dimension (A->ncol)
- *          Column permutation vector, which defines the 
- *           permutation matrix Pc; perm_c[i] = j means column i of A is 
- *           in position j in A*Pc.
- *
- *   perm_r  (input) int*, dimension (A->nrow)
- *           Row permutation vector, which defines the permutation matrix Pr;
- *           perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- *   equed   (input) Specifies the form of equilibration that was done.
- *           = 'N': No equilibration.
- *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *           = 'C': Column equilibration, i.e., A was postmultiplied by
- *                  diag(C).
- *           = 'B': Both row and column equilibration, i.e., A was replaced 
- *                  by diag(R)*A*diag(C).
- *
- *   R       (input) double*, dimension (A->nrow)
- *           The row scale factors for A.
- *           If equed = 'R' or 'B', A is premultiplied by diag(R).
- *           If equed = 'N' or 'C', R is not accessed.
- * 
- *   C       (input) double*, dimension (A->ncol)
- *           The column scale factors for A.
- *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
- *           If equed = 'N' or 'R', C is not accessed.
- *
- *   B       (input) SuperMatrix*
- *           B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *           The right hand side matrix B.
- *           if equed = 'R' or 'B', B is premultiplied by diag(R).
- *
- *   X       (input/output) SuperMatrix*
- *           X has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *           On entry, the solution matrix X, as computed by dgstrs().
- *           On exit, the improved solution matrix X.
- *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
- *               in order to obtain the solution to the original system.
- *
- *   FERR    (output) double*, dimension (B->ncol)   
- *           The estimated forward error bound for each solution vector   
- *           X(j) (the j-th column of the solution matrix X).   
- *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *           is an estimated upper bound for the magnitude of the largest 
- *           element in (X(j) - XTRUE) divided by the magnitude of the   
- *           largest element in X(j).  The estimate is as reliable as   
- *           the estimate for RCOND, and is almost always a slight   
- *           overestimate of the true error.
- *
- *   BERR    (output) double*, dimension (B->ncol)   
- *           The componentwise relative backward error of each solution   
- *           vector X(j) (i.e., the smallest relative change in   
- *           any element of A or B that makes X(j) an exact solution).
- *
- *   stat     (output) SuperLUStat_t*
- *            Record the statistics on runtime and floating-point operation 
- *            See util.h for the definition of 'SuperLUStat_t'.
- *
- *   info    (output) int*   
- *           = 0:  successful exit   
- *            < 0:  if INFO = -i, the i-th argument had an illegal value   
- *
- *    Internal Parameters   
- *    ===================   
- *
- *    ITMAX is the maximum number of steps of iterative refinement.   
- *
- */  
-#define ITMAX 5
-    /* Table of constant values */
-    int    ione = 1;
-    double ndone = -1.;
-    double done = 1.;
-    /* Local variables */
-    NCformat *Astore;
-    double   *Aval;
-    SuperMatrix Bjcol;
-    DNformat *Bstore, *Xstore, *Bjcol_store;
-    double   *Bmat, *Xmat, *Bptr, *Xptr;
-    int      kase;
-    double   safe1, safe2;
-    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
-    int      ldb, ldx, nrhs;
-    double   s, xk, lstres, eps, safmin;
-    char     transc[1];
-    trans_t  transt;
-    double   *work;
-    double   *rwork;
-    int      *iwork;
-    extern double dlamch_(char *);
-    extern int dlacon_(int *, double *, double *, int *, double *, int *);
-#ifdef _CRAY
-    extern int SCOPY(int *, double *, int *, double *, int *);
-    extern int SSAXPY(int *, double *, double *, int *, double *, int *);
-    extern int dcopy_(int *, double *, int *, double *, int *);
-    extern int daxpy_(int *, double *, double *, int *, double *, int *);
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    /* Test the input parameters */
-    *info = 0;
-    notran = (trans == NOTRANS);
-    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
-       *info = -3;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU )
-       *info = -4;
-    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
-        *info = -10;
-    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
-             X->Stype != SLU_DN || X->Dtype != SLU_D || X->Mtype != SLU_GE )
-       *info = -11;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("dgsrfs", &i);
-       return;
-    }
-    /* Quick return if possible */
-    if ( A->nrow == 0 || nrhs == 0) {
-       for (j = 0; j < nrhs; ++j) {
-           ferr[j] = 0.;
-           berr[j] = 0.;
-       }
-       return;
-    }
-    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-    colequ = lsame_(equed, "C") || lsame_(equed, "B");
-    /* Allocate working space */
-    work = doubleMalloc(2*A->nrow);
-    rwork = (double *) SUPERLU_MALLOC( A->nrow * sizeof(double) );
-    iwork = intMalloc(2*A->nrow);
-    if ( !work || !rwork || !iwork ) 
-        ABORT("Malloc fails for work/rwork/iwork.");
-    if ( notran ) {
-       *(unsigned char *)transc = 'N';
-        transt = TRANS;
-    } else {
-       *(unsigned char *)transc = 'T';
-       transt = NOTRANS;
-    }
-    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
-    nz     = A->ncol + 1;
-    eps    = dlamch_("Epsilon");
-    safmin = dlamch_("Safe minimum");
-    safe1  = nz * safmin;
-    safe2  = safe1 / eps;
-    /* Compute the number of nonzeros in each row (or column) of A */
-    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
-    if ( notran ) {
-       for (k = 0; k < A->ncol; ++k)
-           for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
-               ++iwork[Astore->rowind[i]];
-    } else {
-       for (k = 0; k < A->ncol; ++k)
-           iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
-    }  
-    /* Copy one column of RHS B into Bjcol. */
-    Bjcol.Stype = B->Stype;
-    Bjcol.Dtype = B->Dtype;
-    Bjcol.Mtype = B->Mtype;
-    Bjcol.nrow  = B->nrow;
-    Bjcol.ncol  = 1;
-    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
-    Bjcol_store = Bjcol.Store;
-    Bjcol_store->lda = ldb;
-    Bjcol_store->nzval = work; /* address aliasing */
-    /* Do for each right hand side ... */
-    for (j = 0; j < nrhs; ++j) {
-       count = 0;
-       lstres = 3.;
-       Bptr = &Bmat[j*ldb];
-       Xptr = &Xmat[j*ldx];
-       while (1) { /* Loop until stopping criterion is satisfied. */
-           /* Compute residual R = B - op(A) * X,   
-              where op(A) = A, A**T, or A**H, depending on TRANS. */
-#ifdef _CRAY
-           SCOPY(&A->nrow, Bptr, &ione, work, &ione);
-           dcopy_(&A->nrow, Bptr, &ione, work, &ione);
-           sp_dgemv(transc, ndone, A, Xptr, ione, done, work, ione);
-           /* Compute componentwise relative backward error from formula 
-              max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
-              where abs(Z) is the componentwise absolute value of the matrix
-              or vector Z.  If the i-th component of the denominator is less
-              than SAFE2, then SAFE1 is added to the i-th component of the   
-              numerator and denominator before dividing. */
-           for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
-           /* Compute abs(op(A))*abs(X) + abs(B). */
-           if (notran) {
-               for (k = 0; k < A->ncol; ++k) {
-                   xk = fabs( Xptr[k] );
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                       rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
-               }
-           } else {
-               for (k = 0; k < A->ncol; ++k) {
-                   s = 0.;
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       s += fabs(Aval[i]) * fabs(Xptr[irow]);
-                   }
-                   rwork[k] += s;
-               }
-           }
-           s = 0.;
-           for (i = 0; i < A->nrow; ++i) {
-               if (rwork[i] > safe2)
-                   s = SUPERLU_MAX( s, fabs(work[i]) / rwork[i] );
-               else
-                   s = SUPERLU_MAX( s, (fabs(work[i]) + safe1) / 
-                               (rwork[i] + safe1) );
-           }
-           berr[j] = s;
-           /* Test stopping criterion. Continue iterating if   
-              1) The residual BERR(J) is larger than machine epsilon, and   
-              2) BERR(J) decreased by at least a factor of 2 during the   
-                 last iteration, and   
-              3) At most ITMAX iterations tried. */
-           if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
-               /* Update solution and try again. */
-               dgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-#ifdef _CRAY
-               SAXPY(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-               daxpy_(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-               lstres = berr[j];
-               ++count;
-           } else {
-               break;
-           }
-       } /* end while */
-       stat->RefineSteps = count;
-       /* Bound error from formula:
-          norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
-          ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
-          where   
-            norm(Z) is the magnitude of the largest component of Z   
-            inv(op(A)) is the inverse of op(A)   
-            abs(Z) is the componentwise absolute value of the matrix or
-              vector Z   
-            NZ is the maximum number of nonzeros in any row of A, plus 1   
-            EPS is machine epsilon   
-          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
-          is incremented by SAFE1 if the i-th component of   
-          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   
-          Use DLACON to estimate the infinity-norm of the matrix   
-             inv(op(A)) * diag(W),   
-          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
-       for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
-       /* Compute abs(op(A))*abs(X) + abs(B). */
-       if ( notran ) {
-           for (k = 0; k < A->ncol; ++k) {
-               xk = fabs( Xptr[k] );
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                   rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
-           }
-       } else {
-           for (k = 0; k < A->ncol; ++k) {
-               s = 0.;
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   xk = fabs( Xptr[irow] );
-                   s += fabs(Aval[i]) * xk;
-               }
-               rwork[k] += s;
-           }
-       }
-       for (i = 0; i < A->nrow; ++i)
-           if (rwork[i] > safe2)
-               rwork[i] = fabs(work[i]) + (iwork[i]+1)*eps*rwork[i];
-           else
-               rwork[i] = fabs(work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
-       kase = 0;
-       do {
-           dlacon_(&A->nrow, &work[A->nrow], work,
-                   &iwork[A->nrow], &ferr[j], &kase);
-           if (kase == 0) break;
-           if (kase == 1) {
-               /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->nrow; ++i) work[i] *= R[i];
-               dgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
-           } else {
-               /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
-               for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
-               dgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= R[i];  
-           }
-       } while ( kase != 0 );
-       /* Normalize error. */
-       lstres = 0.;
-       if ( notran && colequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, C[i] * fabs( Xptr[i]) );
-       } else if ( !notran && rowequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, R[i] * fabs( Xptr[i]) );
-       } else {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, fabs( Xptr[i]) );
-       }
-       if ( lstres != 0. )
-           ferr[j] /= lstres;
-    } /* for each RHS j ... */
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(rwork);
-    SUPERLU_FREE(iwork);
-    SUPERLU_FREE(Bjcol.Store);
-    return;
-} /* dgsrfs */
diff --git a/superlu/dgssv.c b/superlu/dgssv.c
deleted file mode 100644
index 25469f45..00000000
--- a/superlu/dgssv.c
+++ /dev/null
@@ -1,231 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_ddefs.h"
-dgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-      SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
-      SuperLUStat_t *stat, int *info )
- * Purpose
- * =======
- *
- * DGSSV solves the system of linear equations A*X=B, using the
- * LU factorization from DGSTRF. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *
- *      1.1. Permute the columns of A, forming A*Pc, where Pc
- *           is a permutation matrix. For more details of this step, 
- *           see sp_preorder.c.
- *
- *      1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
- *           by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      1.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the
- *      above algorithm to the transpose of A:
- *
- *      2.1. Permute columns of transpose(A) (rows of A),
- *           forming transpose(A)*Pc, where Pc is a permutation matrix. 
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
- *           determined by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      2.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- * 
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR; Dtype = SLU_D; Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- * perm_c  (input/output) int*
- *         If A->Stype = SLU_NC, column permutation vector of size A->ncol
- *         which defines the permutation matrix Pc; perm_c[i] = j means 
- *         column i of A is in position j in A*Pc.
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- *         If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
- *            options->Fact = SamePattern_SameRowPerm, it is an input argument.
- *            On exit, perm_c may be overwritten by the product of the input
- *            perm_c and a permutation that postorders the elimination tree
- *            of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *            is already in postorder.
- *         Otherwise, it is an output argument.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined 
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->RowPerm = MY_PERMR or
- *            options->Fact = SamePattern_SameRowPerm, perm_r is an
- *            input argument.
- *         otherwise it is an output argument.
- *
- * L       (output) SuperMatrix*
- *         The factor L from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *         
- * U       (output) SuperMatrix*
- *        The factor U from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *         > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                so the solution could not be computed.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol.
- *   
- */
-    DNformat *Bstore;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int      lwork = 0, *etree, i;
-    /* Set default values for some parameters */
-    double   drop_tol = 0.;
-    int      panel_size;     /* panel size */
-    int      relax;          /* no of columns in a relaxed snodes */
-    int      permc_spec;
-    trans_t  trans = NOTRANS;
-    double   *utime;
-    double   t;        /* Temporary time */
-    /* Test the input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    if ( options->Fact != DOFACT ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-        (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-        A->Dtype != SLU_D || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-       B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
-       *info = -7;
-    if ( *info != 0 ) {
-       i = -(*info);
-       xerbla_("dgssv", &i);
-       return;
-    }
-    utime = stat->utime;
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       trans = TRANS;
-    } else {
-        if ( A->Stype == SLU_NC ) AA = A;
-    }
-    t = SuperLU_timer_();
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = NATURAL:  natural ordering 
-     *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-     *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-     *   permc_spec = COLAMD:   approximate minimum degree column ordering
-     *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-     */
-    permc_spec = options->ColPerm;
-    if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-      get_perm_c(permc_spec, AA, perm_c);
-    utime[COLPERM] = SuperLU_timer_() - t;
-    etree = intMalloc(A->ncol);
-    t = SuperLU_timer_();
-    sp_preorder(options, AA, perm_c, etree, &AC);
-    utime[ETREE] = SuperLU_timer_() - t;
-    panel_size = sp_ienv(1);
-    relax = sp_ienv(2);
-    /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-         relax, panel_size, sp_ienv(3), sp_ienv(4));*/
-    t = SuperLU_timer_(); 
-    /* Compute the LU factorization of A. */
-    dgstrf(options, &AC, drop_tol, relax, panel_size,
-          etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
-    utime[FACT] = SuperLU_timer_() - t;
-    t = SuperLU_timer_();
-    if ( *info == 0 ) {
-        /* Solve the system A*X=B, overwriting B with X. */
-        dgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
-    }
-    utime[SOLVE] = SuperLU_timer_() - t;
-    SUPERLU_FREE (etree);
-    Destroy_CompCol_Permuted(&AC);
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-    }
diff --git a/superlu/dgssvx.c b/superlu/dgssvx.c
deleted file mode 100644
index bc7efc3b..00000000
--- a/superlu/dgssvx.c
+++ /dev/null
@@ -1,626 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_ddefs.h"
-dgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-       int *etree, char *equed, double *R, double *C,
-       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
-       SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, 
-       double *rcond, double *ferr, double *berr, 
-       mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info )
- * Purpose
- * =======
- *
- * DGSSVX solves the system of linear equations A*X=B or A'*X=B, using
- * the LU factorization from dgstrf(). Error bounds on the solution and
- * a condition estimate are also provided. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *  
- *      1.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A is
- *           overwritten by diag(R)*A*diag(C) and B by diag(R)*B
- *           (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
- *           = TRANS or CONJ).
- *
- *      1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
- *           matrix that usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      1.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the matrix A (after equilibration if options->Equil = YES)
- *           as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
- *
- *      1.4. Compute the reciprocal pivot growth factor.
- *
- *      1.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form of 
- *           A is used to estimate the condition number of the matrix A. If
- *           the reciprocal of the condition number is less than machine
- *           precision, info = A->ncol+1 is returned as a warning, but the
- *           routine still goes on to solve for X and computes error bounds
- *           as described below.
- *
- *      1.6. The system of equations is solved for X using the factored form
- *           of A.
- *
- *      1.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      1.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R)
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
- *      to the transpose of A:
- *
- *      2.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A' is
- *           overwritten by diag(R)*A'*diag(C) and B by diag(R)*B 
- *           (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
- *
- *      2.2. Permute columns of transpose(A) (rows of A), 
- *           forming transpose(A)*Pc, where Pc is a permutation matrix that 
- *           usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the transpose(A) (after equilibration if 
- *           options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
- *           permutation Pr determined by partial pivoting.
- *
- *      2.4. Compute the reciprocal pivot growth factor.
- *
- *      2.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form 
- *           of transpose(A) is used to estimate the condition number of the
- *           matrix A. If the reciprocal of the condition number
- *           is less than machine precision, info = A->nrow+1 is returned as
- *           a warning, but the routine still goes on to solve for X and
- *           computes error bounds as described below.
- *
- *      2.6. The system of equations is solved for X using the factored form
- *           of transpose(A).
- *
- *      2.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      2.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R) 
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input/output) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- *         On entry, If options->Fact = FACTORED and equed is not 'N', 
- *         then A must have been equilibrated by the scaling factors in
- *         R and/or C.  
- *         On exit, A is not modified if options->Equil = NO, or if 
- *         options->Equil = YES but equed = 'N' on exit.
- *         Otherwise, if options->Equil = YES and equed is not 'N',
- *         A is scaled as follows:
- *         If A->Stype = SLU_NC:
- *           equed = 'R':  A := diag(R) * A
- *           equed = 'C':  A := A * diag(C)
- *           equed = 'B':  A := diag(R) * A * diag(C).
- *         If A->Stype = SLU_NR:
- *           equed = 'R':  transpose(A) := diag(R) * transpose(A)
- *           equed = 'C':  transpose(A) := transpose(A) * diag(C)
- *           equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
- *
- * perm_c  (input/output) int*
- *        If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
- *         which defines the permutation matrix Pc; perm_c[i] = j means
- *         column i of A is in position j in A*Pc.
- *         On exit, perm_c may be overwritten by the product of the input
- *         perm_c and a permutation that postorders the elimination tree
- *         of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *         is already in postorder.
- *
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow,
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *         will try to use the input perm_r, unless a certain threshold
- *         criterion is violated. In that case, perm_r is overwritten by a
- *         new permutation determined by partial pivoting or diagonal
- *         threshold pivoting.
- *         Otherwise, perm_r is output argument.
- * 
- * etree   (input/output) int*,  dimension (A->ncol)
- *         Elimination tree of Pc'*A'*A*Pc.
- *         If options->Fact != FACTORED and options->Fact != DOFACT,
- *         etree is an input argument, otherwise it is an output argument.
- *         Note: etree is a vector of parent pointers for a forest whose
- *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *
- * equed   (input/output) char*
- *         Specifies the form of equilibration that was done.
- *         = 'N': No equilibration.
- *         = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *         = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
- *         = 'B': Both row and column equilibration, i.e., A was replaced 
- *                by diag(R)*A*diag(C).
- *         If options->Fact = FACTORED, equed is an input argument,
- *         otherwise it is an output argument.
- *
- * R       (input/output) double*, dimension (A->nrow)
- *         The row scale factors for A or transpose(A).
- *         If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
- *         If equed = 'N' or 'C', R is not accessed.
- *         If options->Fact = FACTORED, R is an input argument,
- *             otherwise, R is output.
- *         If options->zFact = FACTORED and equed = 'R' or 'B', each element
- *             of R must be positive.
- * 
- * C       (input/output) double*, dimension (A->ncol)
- *         The column scale factors for A or transpose(A).
- *         If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
- *         If equed = 'N' or 'R', C is not accessed.
- *         If options->Fact = FACTORED, C is an input argument,
- *             otherwise, C is output.
- *         If options->Fact = FACTORED and equed = 'C' or 'B', each element
- *             of C must be positive.
- *         
- * L       (output) SuperMatrix*
- *        The factor L from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype SLU_= NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U       (output) SuperMatrix*
- *        The factor U from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * work    (workspace/output) void*, size (lwork) (in bytes)
- *         User supplied workspace, should be large enough
- *         to hold data structures for factors L and U.
- *         On exit, if fact is not 'F', L and U point to this array.
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               mem_usage->total_needed; no other side effects.
- *
- *         See argument 'mem_usage' for memory usage statistics.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         If B->ncol = 0, only LU decomposition is performed, the triangular
- *                         solve is skipped.
- *         On exit,
- *            if equed = 'N', B is not modified; otherwise
- *            if A->Stype = SLU_NC:
- *               if options->Trans = NOTRANS and equed = 'R' or 'B',
- *                  B is overwritten by diag(R)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
- *                  B is overwritten by diag(C)*B;
- *            if A->Stype = SLU_NR:
- *               if options->Trans = NOTRANS and equed = 'C' or 'B',
- *                  B is overwritten by diag(C)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
- *                  B is overwritten by diag(R)*B.
- *
- * X       (output) SuperMatrix*
- *         X has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. 
- *         If info = 0 or info = A->ncol+1, X contains the solution matrix
- *         to the original system of equations. Note that A and B are modified
- *         on exit if equed is not 'N', and the solution to the equilibrated
- *         system is inv(diag(C))*X if options->Trans = NOTRANS and
- *         equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
- *         and equed = 'R' or 'B'.
- *
- * recip_pivot_growth (output) double*
- *         The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
- *         The infinity norm is used. If recip_pivot_growth is much less
- *         than 1, the stability of the LU factorization could be poor.
- *
- * rcond   (output) double*
- *         The estimate of the reciprocal condition number of the matrix A
- *         after equilibration (if done). If rcond is less than the machine
- *         precision (in particular, if rcond = 0), the matrix is singular
- *         to working precision. This condition is indicated by a return
- *         code of info > 0.
- *
- * FERR    (output) double*, dimension (B->ncol)   
- *         The estimated forward error bound for each solution vector   
- *         X(j) (the j-th column of the solution matrix X).   
- *         If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *         is an estimated upper bound for the magnitude of the largest 
- *         element in (X(j) - XTRUE) divided by the magnitude of the   
- *         largest element in X(j).  The estimate is as reliable as   
- *         the estimate for RCOND, and is almost always a slight   
- *         overestimate of the true error.
- *         If options->IterRefine = NOREFINE, ferr = 1.0.
- *
- * BERR    (output) double*, dimension (B->ncol)
- *         The componentwise relative backward error of each solution   
- *         vector X(j) (i.e., the smallest relative change in   
- *         any element of A or B that makes X(j) an exact solution).
- *         If options->IterRefine = NOREFINE, berr = 1.0.
- *
- * mem_usage (output) mem_usage_t*
- *         Record the memory usage statistics, consisting of following fields:
- *         - for_lu (float)
- *           The amount of space used in bytes for L\U data structures.
- *         - total_needed (float)
- *           The amount of space needed in bytes to perform factorization.
- *         - expansions (int)
- *           The number of memory expansions during the LU factorization.
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *         = 0: successful exit   
- *         < 0: if info = -i, the i-th argument had an illegal value   
- *         > 0: if info = i, and i is   
- *              <= A->ncol: U(i,i) is exactly zero. The factorization has   
- *                    been completed, but the factor U is exactly   
- *                    singular, so the solution and error bounds   
- *                    could not be computed.   
- *              = A->ncol+1: U is nonsingular, but RCOND is less than machine
- *                    precision, meaning that the matrix is singular to
- *                    working precision. Nevertheless, the solution and
- *                    error bounds are computed because there are a number
- *                    of situations where the computed solution can be more
- *                    accurate than the value of RCOND would suggest.   
- *              > A->ncol+1: number of bytes allocated when memory allocation
- *                    failure occurred, plus A->ncol.
- *
- */
-    DNformat  *Bstore, *Xstore;
-    double    *Bmat, *Xmat;
-    int       ldb, ldx, nrhs;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int       colequ, equil, nofact, notran, rowequ, permc_spec;
-    trans_t   trant;
-    char      norm[1];
-    int       i, j, info1;
-    double    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
-    int       relax, panel_size;
-    double    diag_pivot_thresh, drop_tol;
-    double    t0;      /* temporary time */
-    double    *utime;
-    /* External functions */
-    extern double dlangs(char *, SuperMatrix *);
-    extern double dlamch_(char *);
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    *info = 0;
-    nofact = (options->Fact != FACTORED);
-    equil = (options->Equil == YES);
-    notran = (options->Trans == NOTRANS);
-    if ( nofact ) {
-       *(unsigned char *)equed = 'N';
-       rowequ = FALSE;
-       colequ = FALSE;
-    } else {
-       rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-       colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       smlnum = dlamch_("Safe minimum");
-       bignum = 1. / smlnum;
-    }
-#if 0
-printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
-       options->Fact, options->Trans, *equed);
-    /* Test the input parameters */
-    if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
-       options->Fact != SamePattern_SameRowPerm &&
-       !notran && options->Trans != TRANS && options->Trans != CONJ &&
-       !equil && options->Equil != NO)
-       *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-             A->Dtype != SLU_D || A->Mtype != SLU_GE )
-       *info = -2;
-    else if (options->Fact == FACTORED &&
-            !(rowequ || colequ || lsame_(equed, "N")))
-       *info = -6;
-    else {
-       if (rowequ) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, R[j]);
-               rcmax = SUPERLU_MAX(rcmax, R[j]);
-           }
-           if (rcmin <= 0.) *info = -7;
-           else if ( A->nrow > 0)
-               rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else rowcnd = 1.;
-       }
-       if (colequ && *info == 0) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, C[j]);
-               rcmax = SUPERLU_MAX(rcmax, C[j]);
-           }
-           if (rcmin <= 0.) *info = -8;
-           else if (A->nrow > 0)
-               colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else colcnd = 1.;
-       }
-       if (*info == 0) {
-           if ( lwork < -1 ) *info = -12;
-           else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     B->Stype != SLU_DN || B->Dtype != SLU_D || 
-                     B->Mtype != SLU_GE )
-               *info = -13;
-           else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     (B->ncol != 0 && B->ncol != X->ncol) ||
-                      X->Stype != SLU_DN ||
-                     X->Dtype != SLU_D || X->Mtype != SLU_GE )
-               *info = -14;
-       }
-    }
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("dgssvx", &i);
-       return;
-    }
-    /* Initialization for factor parameters */
-    panel_size = sp_ienv(1);
-    relax      = sp_ienv(2);
-    diag_pivot_thresh = options->DiagPivotThresh;
-    drop_tol   = 0.0;
-    utime = stat->utime;
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       if ( notran ) { /* Reverse the transpose argument. */
-           trant = TRANS;
-           notran = 0;
-       } else {
-           trant = NOTRANS;
-           notran = 1;
-       }
-    } else { /* A->Stype == SLU_NC */
-       trant = options->Trans;
-       AA = A;
-    }
-    if ( nofact && equil ) {
-       t0 = SuperLU_timer_();
-       /* Compute row and column scalings to equilibrate the matrix A. */
-       dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
-       if ( info1 == 0 ) {
-           /* Equilibrate matrix A. */
-           dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
-           rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-           colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       }
-       utime[EQUIL] = SuperLU_timer_() - t0;
-    }
-    if ( nrhs > 0 ) {
-        /* Scale the right hand side if equilibration was performed. */
-        if ( notran ) {
-           if ( rowequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                       Bmat[i + j*ldb] *= R[i];
-                   }
-           }
-        } else if ( colequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                   Bmat[i + j*ldb] *= C[i];
-               }
-        }
-    }
-    if ( nofact ) {
-        t0 = SuperLU_timer_();
-       /*
-        * Gnet column permutation vector perm_c[], according to permc_spec:
-        *   permc_spec = NATURAL:  natural ordering 
-        *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-        *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-        *   permc_spec = COLAMD:   approximate minimum degree column ordering
-        *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-        */
-       permc_spec = options->ColPerm;
-       if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-            get_perm_c(permc_spec, AA, perm_c);
-       utime[COLPERM] = SuperLU_timer_() - t0;
-       t0 = SuperLU_timer_();
-       sp_preorder(options, AA, perm_c, etree, &AC);
-       utime[ETREE] = SuperLU_timer_() - t0;
-/*     printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-              relax, panel_size, sp_ienv(3), sp_ienv(4));
-       fflush(stdout); */
-       /* Compute the LU factorization of A*Pc. */
-       t0 = SuperLU_timer_();
-       dgstrf(options, &AC, drop_tol, relax, panel_size,
-              etree, work, lwork, perm_c, perm_r, L, U, stat, info);
-       utime[FACT] = SuperLU_timer_() - t0;
-       if ( lwork == -1 ) {
-           mem_usage->total_needed = *info - A->ncol;
-           return;
-       }
-    }
-    if ( options->PivotGrowth ) {
-        if ( *info > 0 ) {
-           if ( *info <= A->ncol ) {
-               /* Compute the reciprocal pivot growth factor of the leading
-                  rank-deficient *info columns of A. */
-               *recip_pivot_growth = dPivotGrowth(*info, AA, perm_c, L, U);
-           }
-           return;
-        }
-        /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
-        *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U);
-    }
-    if ( options->ConditionNumber ) {
-      if (*info == 0) {
-        /* Estimate the reciprocal of the condition number of A. */
-        t0 = SuperLU_timer_();
-        if ( notran ) {
-           *(unsigned char *)norm = '1';
-        } else {
-           *(unsigned char *)norm = 'I';
-        }
-        anorm = dlangs(norm, AA);
-        dgscon(norm, L, U, anorm, rcond, stat, info);
-        utime[RCOND] = SuperLU_timer_() - t0;
-      } else *rcond = 0;
-    }
-    if ( *info == 0 && nrhs > 0 ) {
-        /* Compute the solution matrix X. */
-        for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
-            for (i = 0; i < B->nrow; i++)
-               Xmat[i + j*ldx] = Bmat[i + j*ldb];
-        t0 = SuperLU_timer_();
-        dgstrs (trant, L, U, perm_c, perm_r, X, stat, info);
-        utime[SOLVE] = SuperLU_timer_() - t0;
-        /* Use iterative refinement to improve the computed solution and 
-           error bounds and backward error estimates for it. */
-        t0 = SuperLU_timer_();
-        if ( options->IterRefine != NOREFINE ) {
-            dgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B,
-                   X, ferr, berr, stat, info);
-        } else {
-            for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0;
-        }
-        utime[REFINE] = SuperLU_timer_() - t0;
-        /* Transform the solution matrix X to a solution of the original 
system. */
-        if ( notran ) {
-           if ( colequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        Xmat[i + j*ldx] *= C[i];
-                   }
-           }
-        } else if ( rowequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                   Xmat[i + j*ldx] *= R[i];
-                }
-        }
-    } /* end if nrhs > 0 */
-    if ( *info == 0 && options->ConditionNumber ) {
-        /* Set INFO = A->ncol+1 if the matrix is singular to working 
precision. */
-        if ( *rcond < dlamch_("E") ) *info = A->ncol + 1;
-    }
-    if ( *info != -10000000 && nofact ) {
-        dQuerySpace(L, U, mem_usage);
-        Destroy_CompCol_Permuted(&AC);
-    }
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-    }
diff --git a/superlu/dgstrf.c b/superlu/dgstrf.c
deleted file mode 100644
index 035e624a..00000000
--- a/superlu/dgstrf.c
+++ /dev/null
@@ -1,441 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_ddefs.h"
-dgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
-        int relax, int panel_size, int *etree, void *work, int lwork,
-        int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
-        SuperLUStat_t *stat, int *info)
- * Purpose
- * =======
- *
- * DGSTRF computes an LU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper 
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE.
- *
- * drop_tol (input) double (NOT IMPLEMENTED)
- *         Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
- * relax    (input) int
- *          To control degree of relaxing supernodes. If the number
- *          of nodes (columns) in a subtree of the elimination tree is less
- *          than relax, this subtree is considered as one supernode,
- *          regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *          A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *          Elimination tree of A'*A.
- *          Note: etree is a vector of parent pointers for a forest whose
- *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *          On input, the columns of A should be permuted so that the
- *          etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *          User-supplied work space and space for the output data structures.
- *          Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *         Column permutation vector, which defines the 
- *          permutation matrix Pc; perm_c[i] = j means column i of A is 
- *          in position j in A*Pc.
- *          When searching for diagonal, perm_c[*] is applied to the
- *          row subscripts of A, so that diagonal threshold pivoting
- *          can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *          Row permutation vector which defines the permutation matrix Pr,
- *          perm_r[i] = j means row i of A is in position j in Pr*A.
- *          If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *             will try to use the input perm_r, unless a certain threshold
- *             criterion is violated. In that case, perm_r is overwritten by
- *             a new permutation determined by partial pivoting or diagonal
- *             threshold pivoting.
- *          Otherwise, perm_r is output argument;
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = SLU_NC, 
- *          Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *          = 0: successful exit
- *          < 0: if info = -i, the i-th argument had an illegal value
- *          > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                and division by zero will occur if it is used to solve a
- *                system of equations.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol. If lwork = -1, it is
- *                the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays: 
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   xprune[0:n-1]: xprune[*] points to locations in subscript 
- *     vector lsub[*]. For column i, xprune[i] denotes the point where 
- *     structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need 
- *     to be traversed for symbolic factorization.
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been 
- *     reached when working on column j.
- *     Storage: relative to original row subscripts
- *     NOTE: There are 3 of them: marker/marker1 are used for panel dfs, 
- *           see dpanel_dfs.c; marker2 is used for inner-factorization,
- *            see dcolumn_dfs.c.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *      Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs) 
- *     unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *     in topological order of the dfs. A supernode representative is the 
- *     last column of a supernode.
- *      The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a 
- *     supernodal representative r, repfnz[r] is the location of the first 
- *     nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *     indicates the supernode r has been explored.
- *     NOTE: There are W of them, each used for one column of a panel. 
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below 
- *      the panel diagonal. These are filled in during dpanel_dfs(), and are
- *      used later in the inner LU factorization within the panel.
- *     panel_lsub[]/dense[] pair forms the SPA data structure.
- *     NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *                NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *     The size of this array is defined by NUM_TEMPV() in dsp_defs.h.
- *
- */
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when 
-                                  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *iwork;
-    double    *dwork;
-    int              *segrep, *repfnz, *parent, *xplore;
-    int              *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide 
SPA */
-    int              *xprune;
-    int              *marker;
-    double    *dense, *tempv;
-    int       *relax_end;
-    double    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    int       pivrow;   /* pivotal row number in the original matrix A */
-    int       nseg1;   /* no of segments in U-column above panel row jcol */
-    int       nseg;    /* no of segments in each U-column */
-    register int jcol; 
-    register int kcol; /* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;   /* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-    iinfo    = 0;
-    m        = A->nrow;
-    n        = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a        = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-    /* Allocate storage common to the factor routines */
-    *info = dLUMemInit(fact, work, lwork, m, n, Astore->nnz,
-                       panel_size, L, U, &Glu, &iwork, &dwork);
-    if ( *info ) return;
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-            &repfnz, &panel_lsub, &xprune, &marker);
-    dSetRWork(m, panel_size, dwork, &dense, &tempv);
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-       /* Compute the inverse of perm_r */
-       iperm_r = (int *) intMalloc(m);
-       for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-       iperm_r_allocated = 1;
-    }
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES ) {
-        heap_relax_snode(n, etree, relax, marker, relax_end); 
-    } else {
-        relax_snode(n, etree, relax, marker, relax_end); 
-    }
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-    /* 
-     * Work on one "panel" at a time. A panel is one of the following: 
-     *    (a) a relaxed supernode at the bottom of the etree, or
-     *    (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-      if (handle_getfem_callback() != 0) {
-       iinfo = *info = -333333333; goto HOUSTON_WE_HAVE_A_PROBLEM; 
-       break;
-      }
-       if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-           kcol = relax_end[jcol];       /* end of the relaxed snode */
-           panel_histo[kcol-jcol+1]++;
-           /* --------------------------------------
-            * Factorize the relaxed supernode(jcol:kcol) 
-            * -------------------------------------- */
-           /* Determine the union of the row structure of the snode */
-           if ( (*info = dsnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-                                   xprune, marker, &Glu)) != 0 )
-               return;
-            nextu    = xusub[jcol];
-           nextlu   = xlusup[jcol];
-           jsupno   = supno[jcol];
-           fsupc    = xsup[jsupno];
-           new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-           nzlumax = Glu.nzlumax;
-           while ( new_next > nzlumax ) {
-               if ( (*info = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) 
-                   return;
-           }
-           for (icol = jcol; icol<= kcol; icol++) {
-               xusub[icol+1] = nextu;
-               /* Scatter into SPA dense[*] */
-               for (k = xa_begin[icol]; k < xa_end[icol]; k++)
-                   dense[asub[k]] = a[k];
-               /* Numeric update within the snode */
-               dsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-               if ( (*info = dpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-#ifdef DEBUG
-               dprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
-           }
-           jcol = icol;
-       } else { /* Work on one panel of panel_size columns */
-           /* Adjust panel_size so that a panel won't overlap with the next 
-            * relaxed snode.
-            */
-           panel_size = w_def;
-           for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) 
-               if ( relax_end[k] != EMPTY ) {
-                   panel_size = k - jcol;
-                   break;
-               }
-           if ( k == min_mn ) panel_size = min_mn - jcol;
-           panel_histo[panel_size]++;
-           /* symbolic factor on a panel of columns */
-           dpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                     dense, panel_lsub, segrep, repfnz, xprune,
-                     marker, parent, xplore, &Glu);
-           /* numeric sup-panel updates in topological order */
-           dpanel_bmod(m, panel_size, jcol, nseg1, dense,
-                       tempv, segrep, repfnz, &Glu, stat);
-           /* Sparse LU within the panel, and below panel diagonal */
-           for ( jj = jcol; jj < jcol + panel_size; jj++) {
-               k = (jj - jcol) * m; /* column index for w-wide arrays */
-               nseg = nseg1;   /* Begin after all the panel segments */
-               if ((*info = dcolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
-                                       segrep, &repfnz[k], xprune, marker,
-                                       parent, xplore, &Glu)) != 0) 
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-               /* Numeric updates */
-               if ((*info = dcolumn_bmod(jj, (nseg - nseg1), &dense[k],
-                                        tempv, &segrep[nseg1], &repfnz[k],
-                                        jcol, &Glu, stat)) != 0)
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-               /* Copy the U-segments to ucol[*] */
-               if ((*info = dcopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-                                         perm_r, &dense[k], &Glu)) != 0)
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-               if ( (*info = dpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-               /* Prune columns (0:jj-1) using column jj */
-               dpruneL(jj, perm_r, pivrow, nseg, segrep,
-                        &repfnz[k], xprune, &Glu);
-               /* Reset repfnz[] for this column */
-               resetrep_col (nseg, segrep, &repfnz[k]);
-#ifdef DEBUG
-               dprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
-           }
-           jcol += panel_size; /* Move to the next panel */
-       } /* else */
-    } /* for */
-    *info = iinfo;
- HOUSTON_WE_HAVE_A_PROBLEM: /* try to avoid ugly leaks.. */
-    if ( m > n ) {
-       k = 0;
-        for (i = 0; i < m; ++i) 
-            if ( perm_r[i] == EMPTY ) {
-               perm_r[i] = n + k;
-               ++k;
-           }
-    }
-    if (*info == 0) {
-      countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
-      fixupL(min_mn, perm_r, &Glu);
-    }
-    dLUWorkFree(iwork, dwork, &Glu); /* Free work space and compress storage */
-    if ( fact == SamePattern_SameRowPerm ) {
-        /* L and U structures may have changed due to possibly different
-          pivoting, even though the storage is available.
-          There could also be memory expansions, so the array locations
-           may have changed, */
-        ((SCformat *)L->Store)->nnz = nnzL;
-       ((SCformat *)L->Store)->nsuper = Glu.supno[n];
-       ((SCformat *)L->Store)->nzval = Glu.lusup;
-       ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-       ((SCformat *)L->Store)->rowind = Glu.lsub;
-       ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-       ((NCformat *)U->Store)->nnz = nnzU;
-       ((NCformat *)U->Store)->nzval = Glu.ucol;
-       ((NCformat *)U->Store)->rowind = Glu.usub;
-       ((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-        dCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, 
-                                Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-                                Glu.xsup, SLU_SC, SLU_D, SLU_TRLU);
-       dCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, 
-                              Glu.usub, Glu.xusub, SLU_NC, SLU_D, SLU_TRU);
-    }
-    ops[FACT] += ops[TRSV] + ops[GEMV];        
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
diff --git a/superlu/dgstrs.c b/superlu/dgstrs.c
deleted file mode 100644
index 8735b879..00000000
--- a/superlu/dgstrs.c
+++ /dev/null
@@ -1,330 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_ddefs.h"
-extern void dtrsm_();
-extern void dgemm_();
- * Function prototypes 
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-dgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
- * Purpose
- * =======
- *
- * DGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         dgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         dgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *        Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *        < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-    int      incx = 1, incy = 1;
-    double   alpha = 1.0, beta = 1.0;
-    double   *work_col;
-    DNformat *Bstore;
-    double   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    double   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void dprint_soln();
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
-       *info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU )
-       *info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
-       *info = -6;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("dgstrs", &i);
-       return;
-    }
-    n = L->nrow;
-    work = doubleCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = doubleMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    if ( trans == NOTRANS ) {
-       /* Permute right hand sides to form Pr*B */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       /* Forward solve PLy=Pb. */
-       for (k = 0; k <= Lstore->nsuper; k++) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           nrow = nsupr - nsupc;
-           solve_ops += nsupc * (nsupc - 1) * nrhs;
-           solve_ops += 2 * nrow * nsupc * nrhs;
-           if ( nsupc == 1 ) {
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   luptr = L_NZ_START(fsupc);
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
-                   }
-               }
-           } else {
-               luptr = L_NZ_START(fsupc);
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("N", strlen("N"));
-               ftcs3 = _cptofcd("U", strlen("U"));
-               STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-               dtrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               dgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   work_col = &work[j*n];
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work_col[i]; /* Scatter */
-                       work_col[i] = 0.0;
-                       iptr++;
-                   }
-               }
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   dlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-                   dmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-                           &rhs_work[fsupc], &work[0] );
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work[i];
-                       work[i] = 0.0;
-                       iptr++;
-                   }
-               }
-           } /* else ... */
-       } /* for L-solve */
-#ifdef DEBUG
-       printf("After L-solve: y=\n");
-       dprint_soln(n, nrhs, Bmat);
-       /*
-        * Back solve Ux=y.
-        */
-       for (k = Lstore->nsuper; k >= 0; k--) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           luptr = L_NZ_START(fsupc);
-           solve_ops += nsupc * (nsupc + 1) * nrhs;
-           if ( nsupc == 1 ) {
-               rhs_work = &Bmat[0];
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work[fsupc] /= Lval[luptr];
-                   rhs_work += ldb;
-               }
-           } else {
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("U", strlen("U"));
-               ftcs3 = _cptofcd("N", strlen("N"));
-               STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               dtrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               for (j = 0; j < nrhs; j++)
-                   dusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-           }
-           for (j = 0; j < nrhs; ++j) {
-               rhs_work = &Bmat[j*ldb];
-               for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-                   solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-                       irow = U_SUB(i);
-                       rhs_work[irow] -= rhs_work[jcol] * Uval[i];
-                   }
-               }
-           }
-       } /* for U-solve */
-#ifdef DEBUG
-       printf("After U-solve: x=\n");
-       dprint_soln(n, nrhs, Bmat);
-       /* Compute the final solution X := Pc*X. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-        stat->ops[SOLVE] = solve_ops;
-    } else { /* Solve A'*X=B or CONJ(A)*X=B */
-       /* Permute right hand sides to form Pc'*B. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       stat->ops[SOLVE] = 0;
-       for (k = 0; k < nrhs; ++k) {
-           /* Multiply by inv(U'). */
-           sp_dtrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-           /* Multiply by inv(L'). */
-           sp_dtrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-       }
-       /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-    }
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
- * Diagnostic print of the solution vector 
- */
-dprint_soln(int n, int nrhs, double *soln)
-    int i;
-    for (i = 0; i < n; i++) 
-       printf("\t%d: %.4f\n", i, soln[i]);
diff --git a/superlu/dgstrsL.c b/superlu/dgstrsL.c
deleted file mode 100644
index 3ac6dfec..00000000
--- a/superlu/dgstrsL.c
+++ /dev/null
@@ -1,230 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * September 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_ddefs.h"
-#include "slu_util.h"
- * Function prototypes 
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-dgstrsL(char *trans, SuperMatrix *L, int *perm_r, SuperMatrix *B, int *info)
- * Purpose
- * =======
- *
- * DGSTRSL only performs the L-solve using the LU factorization computed
- * by DGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) char*
- *          Specifies the form of the system of equations:
- *          = 'N':  A * X = B  (No transpose)
- *          = 'T':  A'* X = B  (Transpose)
- *          = 'C':  A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         dgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         dgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * info    (output) int*
- *        = 0: successful exit
- *        < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-    int      incx = 1, incy = 1;
-    double   alpha = 1.0, beta = 1.0;
-    DNformat *Bstore;
-    double   *Bmat;
-    SCformat *Lstore;
-    double   *Lval, *Uval;
-    int      nrow, notran;
-    int      fsupc, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    double   *work, *work_col, *rhs_work, *soln;
-    flops_t  solve_ops;
-    extern SuperLUStat_t SuperLUStat;
-    void dprint_soln();
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    notran = lsame_(trans, "N");
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C") ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
-       *info = -2;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
-       *info = -4;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("dgstrsL", &i);
-       return;
-    }
-    n = L->nrow;
-    work = doubleCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = doubleMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    solve_ops = 0;
-    if ( notran ) {
-       /* Permute right hand sides to form Pr*B */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       /* Forward solve PLy=Pb. */
-       for (k = 0; k <= Lstore->nsuper; k++) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           nrow = nsupr - nsupc;
-           solve_ops += nsupc * (nsupc - 1) * nrhs;
-           solve_ops += 2 * nrow * nsupc * nrhs;
-           if ( nsupc == 1 ) {
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   luptr = L_NZ_START(fsupc);
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
-                   }
-               }
-           } else {
-               luptr = L_NZ_START(fsupc);
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("N", strlen("N"));
-               ftcs3 = _cptofcd("U", strlen("U"));
-               STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-               dtrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               dgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   work_col = &work[j*n];
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work_col[i]; /* Scatter */
-                       work_col[i] = 0.0;
-                       iptr++;
-                   }
-               }
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   dlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-                   dmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-                           &rhs_work[fsupc], &work[0] );
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work[i];
-                       work[i] = 0.0;
-                       iptr++;
-                   }
-               }
-           } /* else ... */
-       } /* for L-solve */
-#ifdef DEBUG
-       printf("After L-solve: y=\n");
-       dprint_soln(n, nrhs, Bmat);
-        SuperLUStat.ops[SOLVE] = solve_ops;
-    } else { 
-      printf("Transposed solve not implemented.\n");
-      exit(0);
-    }
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
- * Diagnostic print of the solution vector 
- */
-dprint_soln(int n, int nrhs, double *soln)
-    int i;
-    for (i = 0; i < n; i++) 
-       printf("\t%d: %.4f\n", i, soln[i]);
diff --git a/superlu/dlacon.c b/superlu/dlacon.c
deleted file mode 100644
index 26d21220..00000000
--- a/superlu/dlacon.c
+++ /dev/null
@@ -1,250 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <math.h>
-#include "slu_Cnames.h"
-dlacon_(int *n, double *v, double *x, int *isgn, double *est, int *kase)
-    Purpose   
-    =======   
-    DLACON estimates the 1-norm of a square matrix A.   
-    Reverse communication is used for evaluating matrix-vector products. 
-    Arguments   
-    =========   
-    N      (input) INT
-           The order of the matrix.  N >= 1.   
-    V      (workspace) DOUBLE PRECISION array, dimension (N)   
-           On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
-           (W is not returned).   
-    X      (input/output) DOUBLE PRECISION array, dimension (N)   
-           On an intermediate return, X should be overwritten by   
-                 A * X,   if KASE=1,   
-                 A' * X,  if KASE=2,
-           and DLACON must be re-called with all the other parameters   
-           unchanged.   
-    ISGN   (workspace) INT array, dimension (N)
-    EST    (output) DOUBLE PRECISION   
-           An estimate (a lower bound) for norm(A).   
-    KASE   (input/output) INT
-           On the initial call to DLACON, KASE should be 0.   
-           On an intermediate return, KASE will be 1 or 2, indicating   
-           whether X should be overwritten by A * X  or A' * X.   
-           On the final return from DLACON, KASE will again be 0.   
-    Further Details   
-    ======= =======   
-    Contributed by Nick Higham, University of Manchester.   
-    Originally named CONEST, dated March 16, 1988.   
-    Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
-    a real or complex matrix, with applications to condition estimation", 
-    ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
-    ===================================================================== 
-    /* Table of constant values */
-    int c__1 = 1;
-    double      zero = 0.0;
-    double      one = 1.0;
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static double altsgn, estold;
-    static int i, j;
-    double temp;
-#ifdef _CRAY
-    extern int ISAMAX(int *, double *, int *);
-    extern double SASUM(int *, double *, int *);
-    extern int SCOPY(int *, double *, int *, double *, int *);
-    extern int idamax_(int *, double *, int *);
-    extern double dasum_(int *, double *, int *);
-    extern int dcopy_(int *, double *, int *, double *, int *);
-#define d_sign(a, b) (b >= 0 ? fabs(a) : -fabs(a))    /* Copy sign */
-#define i_dnnt(a) \
-       ( a>=0 ? floor(a+.5) : -floor(.5-a) ) /* Round to nearest integer */
-    if ( *kase == 0 ) {
-       for (i = 0; i < *n; ++i) {
-           x[i] = 1. / (double) (*n);
-       }
-       *kase = 1;
-       jump = 1;
-       return 0;
-    }
-    switch (jump) {
-       case 1:  goto L20;
-       case 2:  goto L40;
-       case 3:  goto L70;
-       case 4:  goto L110;
-       case 5:  goto L140;
-    }
-    /*     ................ ENTRY   (JUMP = 1)   
-  L20:
-    if (*n == 1) {
-       v[0] = x[0];
-       *est = fabs(v[0]);
-       /*        ... QUIT */
-       goto L150;
-    }
-#ifdef _CRAY
-    *est = SASUM(n, x, &c__1);
-    *est = dasum_(n, x, &c__1);
-    for (i = 0; i < *n; ++i) {
-       x[i] = d_sign(one, x[i]);
-       isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 2)   
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-    j = idamax_(n, &x[0], &c__1);
-    --j;
-    iter = 2;
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 3)   
-#ifdef _CRAY
-    SCOPY(n, x, &c__1, v, &c__1);
-    dcopy_(n, x, &c__1, v, &c__1);
-    estold = *est;
-#ifdef _CRAY
-    *est = SASUM(n, v, &c__1);
-    *est = dasum_(n, v, &c__1);
-    for (i = 0; i < *n; ++i)
-       if (i_dnnt(d_sign(one, x[i])) != isgn[i])
-           goto L90;
-    goto L120;
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-    for (i = 0; i < *n; ++i) {
-       x[i] = d_sign(one, x[i]);
-       isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 4)   
-    jlast = j;
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-    j = idamax_(n, &x[0], &c__1);
-    --j;
-    if (x[jlast] != fabs(x[j]) && iter < 5) {
-       ++iter;
-       goto L50;
-    }
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-       x[i-1] = altsgn * ((double)(i - 1) / (double)(*n - 1) + 1.);
-       altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 5)   
-#ifdef _CRAY
-    temp = SASUM(n, x, &c__1) / (double)(*n * 3) * 2.;
-    temp = dasum_(n, x, &c__1) / (double)(*n * 3) * 2.;
-    if (temp > *est) {
-#ifdef _CRAY
-       SCOPY(n, &x[0], &c__1, &v[0], &c__1);
-       dcopy_(n, &x[0], &c__1, &v[0], &c__1);
-       *est = temp;
-    }
-    *kase = 0;
-    return 0;
-} /* dlacon_ */
diff --git a/superlu/dlamch.c b/superlu/dlamch.c
deleted file mode 100644
index e01db465..00000000
--- a/superlu/dlamch.c
+++ /dev/null
@@ -1,1004 +0,0 @@
-#include <stdio.h>
-#include "slu_Cnames.h"
-#define TRUE_ (1)
-#define FALSE_ (0)
-#define abs(x) ((x) >= 0 ? (x) : -(x))
-#define min(a,b) ((a) <= (b) ? (a) : (b))
-#define max(a,b) ((a) >= (b) ? (a) : (b))
-double dlamch_(char *cmach)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Copyright (c) 1992-2013 The University of Tennessee and The University
-                        of Tennessee Research Foundation.  All rights
-                        reserved.
-       Copyright (c) 2000-2013 The University of California Berkeley. All
-                        rights reserved.
-       Copyright (c) 2006-2013 The University of Colorado Denver.  All rights
-                        reserved.
-       Redistribution and use in source and binary forms, with or without
-       modification, are permitted provided that the following conditions are
-       met:
-       - Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       - Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer listed
-         in this license in the documentation and/or other materials
-         provided with the distribution.
-       - Neither the name of the copyright holders nor the names of its
-         contributors may be used to endorse or promote products derived from
-         this software without specific prior written permission.
-       The copyright holders provide no reassurances that the source code
-       provided does not infringe any patent, copyright, or any other
-       intellectual property rights of third parties.  The copyright holders
-       disclaim any liability to any recipient for claims brought against
-       recipient by any third party for infringement of that parties
-       intellectual property rights.
-    Purpose   
-    =======   
-    DLAMCH determines double precision machine parameters.   
-    Arguments   
-    =========   
-    CMACH   (input) CHARACTER*1   
-            Specifies the value to be returned by DLAMCH:   
-            = 'E' or 'e',   DLAMCH := eps   
-            = 'S' or 's ,   DLAMCH := sfmin   
-            = 'B' or 'b',   DLAMCH := base   
-            = 'P' or 'p',   DLAMCH := eps*base   
-            = 'N' or 'n',   DLAMCH := t   
-            = 'R' or 'r',   DLAMCH := rnd   
-            = 'M' or 'm',   DLAMCH := emin   
-            = 'U' or 'u',   DLAMCH := rmin   
-            = 'L' or 'l',   DLAMCH := emax   
-            = 'O' or 'o',   DLAMCH := rmax   
-            where   
-            eps   = relative machine precision   
-            sfmin = safe minimum, such that 1/sfmin does not overflow   
-            base  = base of the machine   
-            prec  = eps*base   
-            t     = number of (base) digits in the mantissa   
-            rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise   
-            emin  = minimum exponent before (gradual) underflow   
-            rmin  = underflow threshold - base**(emin-1)   
-            emax  = largest exponent before overflow   
-            rmax  = overflow threshold  - (base**emax)*(1-eps)   
-   ===================================================================== 
-    static int first = TRUE_;
-    /* System generated locals */
-    int i__1;
-    double ret_val;
-    /* Builtin functions */
-    double pow_di(double *, int *);
-    /* Local variables */
-    static double base;
-    static int beta;
-    static double emin, prec, emax;
-    static int imin, imax;
-    static int lrnd;
-    static double rmin, rmax, t, rmach;
-    extern int lsame_(char *, char *);
-    static double small, sfmin;
-    extern /* Subroutine */ int dlamc2_(int *, int *, int *, 
-           double *, int *, double *, int *, double *);
-    static int it;
-    static double rnd, eps;
-    if (first) {
-       first = FALSE_;
-       dlamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
-       base = (double) beta;
-       t = (double) it;
-       if (lrnd) {
-           rnd = 1.;
-           i__1 = 1 - it;
-           eps = pow_di(&base, &i__1) / 2;
-       } else {
-           rnd = 0.;
-           i__1 = 1 - it;
-           eps = pow_di(&base, &i__1);
-       }
-       prec = eps * base;
-       emin = (double) imin;
-       emax = (double) imax;
-       sfmin = rmin;
-       small = 1. / rmax;
-       if (small >= sfmin) {
-       /* Use SMALL plus a bit, to avoid the possibility of rounding   
-             causing overflow when computing  1/sfmin. */
-           sfmin = small * (eps + 1.);
-       }
-    }
-    if (lsame_(cmach, "E")) {
-       rmach = eps;
-    } else if (lsame_(cmach, "S")) {
-       rmach = sfmin;
-    } else if (lsame_(cmach, "B")) {
-       rmach = base;
-    } else if (lsame_(cmach, "P")) {
-       rmach = prec;
-    } else if (lsame_(cmach, "N")) {
-       rmach = t;
-    } else if (lsame_(cmach, "R")) {
-       rmach = rnd;
-    } else if (lsame_(cmach, "M")) {
-       rmach = emin;
-    } else if (lsame_(cmach, "U")) {
-       rmach = rmin;
-    } else if (lsame_(cmach, "L")) {
-       rmach = emax;
-    } else if (lsame_(cmach, "O")) {
-       rmach = rmax;
-    }
-    ret_val = rmach;
-    return ret_val;
-/*     End of DLAMCH */
-} /* dlamch_ */
-/* Subroutine */ int dlamc1_(int *beta, int *t, int *rnd, int 
-       *ieee1)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-    Purpose   
-    =======   
-    DLAMC1 determines the machine parameters given by BETA, T, RND, and   
-    IEEE1.   
-    Arguments   
-    =========   
-    BETA    (output) INT   
-            The base of the machine.   
-    T       (output) INT   
-            The number of ( BETA ) digits in the mantissa.   
-    RND     (output) INT   
-            Specifies whether proper rounding  ( RND = .TRUE. )  or   
-            chopping  ( RND = .FALSE. )  occurs in addition. This may not 
-            be a reliable guide to the way in which the machine performs 
-            its arithmetic.   
-    IEEE1   (output) INT   
-            Specifies whether rounding appears to be done in the IEEE   
-            'round to nearest' style.   
-    Further Details   
-    ===============   
-    The routine is based on the routine  ENVRON  by Malcolm and   
-    incorporates suggestions by Gentleman and Marovich. See   
-       Malcolm M. A. (1972) Algorithms to reveal properties of   
-          floating-point arithmetic. Comms. of the ACM, 15, 949-951.   
-       Gentleman W. M. and Marovich S. B. (1974) More on algorithms   
-          that reveal properties of floating point arithmetic units.   
-          Comms. of the ACM, 17, 276-277.   
-   ===================================================================== 
-    /* Initialized data */
-    static int first = TRUE_;
-    /* System generated locals */
-    double d__1, d__2;
-    /* Local variables */
-    static int lrnd;
-    static double a, b, c, f;
-    static int lbeta;
-    static double savec;
-    extern double dlamc3_(double *, double *);
-    static int lieee1;
-    static double t1, t2;
-    static int lt;
-    static double one, qtr;
-    if (first) {
-       first = FALSE_;
-       one = 1.;
-/*        LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BE
-          IEEE1, T and RND.   
-          Throughout this routine  we use the function  DLAMC3  to ens
-          that relevant values are  stored and not held in registers, 
- or   
-          are not affected by optimizers.   
-          Compute  a = 2.0**m  with the  smallest positive integer m s
-          that   
-             fl( a + 1.0 ) = a. */
-       a = 1.;
-       c = 1.;
-/* +       WHILE( C.EQ.ONE )LOOP */
-       if (c == one) {
-           a *= 2;
-           c = dlamc3_(&a, &one);
-           d__1 = -a;
-           c = dlamc3_(&c, &d__1);
-           goto L10;
-       }
-/* +       END WHILE   
-          Now compute  b = 2.0**m  with the smallest positive integer 
-          such that   
-             fl( a + b ) .gt. a. */
-       b = 1.;
-       c = dlamc3_(&a, &b);
-/* +       WHILE( C.EQ.A )LOOP */
-       if (c == a) {
-           b *= 2;
-           c = dlamc3_(&a, &b);
-           goto L20;
-       }
-/* +       END WHILE   
-          Now compute the base.  a and c  are neighbouring floating po
-          numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and
- so   
-          their difference is beta. Adding 0.25 to c is to ensure that
- it   
-          is truncated to beta and not ( beta - 1 ). */
-       qtr = one / 4;
-       savec = c;
-       d__1 = -a;
-       c = dlamc3_(&c, &d__1);
-       lbeta = (int) (c + qtr);
-/*        Now determine whether rounding or chopping occurs,  by addin
-g a   
-          bit  less  than  beta/2  and a  bit  more  than  beta/2  to 
- a. */
-       b = (double) lbeta;
-       d__1 = b / 2;
-       d__2 = -b / 100;
-       f = dlamc3_(&d__1, &d__2);
-       c = dlamc3_(&f, &a);
-       if (c == a) {
-           lrnd = TRUE_;
-       } else {
-           lrnd = FALSE_;
-       }
-       d__1 = b / 2;
-       d__2 = b / 100;
-       f = dlamc3_(&d__1, &d__2);
-       c = dlamc3_(&f, &a);
-       if (lrnd && c == a) {
-           lrnd = FALSE_;
-       }
-/*        Try and decide whether rounding is done in the  IEEE  'round
- to   
-          nearest' style. B/2 is half a unit in the last place of the 
-          numbers A and SAVEC. Furthermore, A is even, i.e. has last  
-          zero, and SAVEC is odd. Thus adding B/2 to A should not  cha
-          A, but adding B/2 to SAVEC should change SAVEC. */
-       d__1 = b / 2;
-       t1 = dlamc3_(&d__1, &a);
-       d__1 = b / 2;
-       t2 = dlamc3_(&d__1, &savec);
-       lieee1 = t1 == a && t2 > savec && lrnd;
-/*        Now find  the  mantissa, t.  It should  be the  integer part
- of   
-          log to the base beta of a,  however it is safer to determine
-  t   
-          by powering.  So we find t as the smallest positive integer 
-          which   
-             fl( beta**t + 1.0 ) = 1.0. */
-       lt = 0;
-       a = 1.;
-       c = 1.;
-/* +       WHILE( C.EQ.ONE )LOOP */
-       if (c == one) {
-           ++lt;
-           a *= lbeta;
-           c = dlamc3_(&a, &one);
-           d__1 = -a;
-           c = dlamc3_(&c, &d__1);
-           goto L30;
-       }
-/* +       END WHILE */
-    }
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *ieee1 = lieee1;
-    return 0;
-/*     End of DLAMC1 */
-} /* dlamc1_ */
-/* Subroutine */ int dlamc2_(int *beta, int *t, int *rnd, 
-       double *eps, int *emin, double *rmin, int *emax, 
-       double *rmax)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-    Purpose   
-    =======   
-    DLAMC2 determines the machine parameters specified in its argument   
-    list.   
-    Arguments   
-    =========   
-    BETA    (output) INT   
-            The base of the machine.   
-    T       (output) INT   
-            The number of ( BETA ) digits in the mantissa.   
-    RND     (output) INT   
-            Specifies whether proper rounding  ( RND = .TRUE. )  or   
-            chopping  ( RND = .FALSE. )  occurs in addition. This may not 
-            be a reliable guide to the way in which the machine performs 
-            its arithmetic.   
-    EPS     (output) DOUBLE PRECISION   
-            The smallest positive number such that   
-               fl( 1.0 - EPS ) .LT. 1.0,   
-            where fl denotes the computed value.   
-    EMIN    (output) INT   
-            The minimum exponent before (gradual) underflow occurs.   
-    RMIN    (output) DOUBLE PRECISION   
-            The smallest normalized number for the machine, given by   
-            BASE**( EMIN - 1 ), where  BASE  is the floating point value 
-            of BETA.   
-    EMAX    (output) INT   
-            The maximum exponent before overflow occurs.   
-    RMAX    (output) DOUBLE PRECISION   
-            The largest positive number for the machine, given by   
-            BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point 
-            value of BETA.   
-    Further Details   
-    ===============   
-    The computation of  EPS  is based on a routine PARANOIA by   
-    W. Kahan of the University of California at Berkeley.   
-   ===================================================================== 
-    /* Table of constant values */
-    static int c__1 = 1;
-    /* Initialized data */
-    static int first = TRUE_;
-    static int iwarn = FALSE_;
-    /* System generated locals */
-    int i__1;
-    double d__1, d__2, d__3, d__4, d__5;
-    /* Builtin functions */
-    double pow_di(double *, int *);
-    /* Local variables */
-    static int ieee;
-    static double half;
-    static int lrnd;
-    static double leps, zero, a, b, c;
-    static int i, lbeta;
-    static double rbase;
-    static int lemin, lemax, gnmin;
-    static double small;
-    static int gpmin;
-    static double third, lrmin, lrmax, sixth;
-    extern /* Subroutine */ int dlamc1_(int *, int *, int *, 
-           int *);
-    extern double dlamc3_(double *, double *);
-    static int lieee1;
-    extern /* Subroutine */ int dlamc4_(int *, double *, int *), 
-           dlamc5_(int *, int *, int *, int *, int *, 
-           double *);
-    static int lt, ngnmin, ngpmin;
-    static double one, two;
-    if (first) {
-       first = FALSE_;
-       zero = 0.;
-       one = 1.;
-       two = 2.;
-/*        LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values
- of   
-          BETA, T, RND, EPS, EMIN and RMIN.   
-          Throughout this routine  we use the function  DLAMC3  to ens
-          that relevant values are stored  and not held in registers, 
- or   
-          are not affected by optimizers.   
-          DLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1. 
-       dlamc1_(&lbeta, &lt, &lrnd, &lieee1);
-/*        Start to find EPS. */
-       b = (double) lbeta;
-       i__1 = -lt;
-       a = pow_di(&b, &i__1);
-       leps = a;
-/*        Try some tricks to see whether or not this is the correct  E
-PS. */
-       b = two / 3;
-       half = one / 2;
-       d__1 = -half;
-       sixth = dlamc3_(&b, &d__1);
-       third = dlamc3_(&sixth, &sixth);
-       d__1 = -half;
-       b = dlamc3_(&third, &d__1);
-       b = dlamc3_(&b, &sixth);
-       b = abs(b);
-       if (b < leps) {
-           b = leps;
-       }
-       leps = 1.;
-/* +       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
-       if (leps > b && b > zero) {
-           leps = b;
-           d__1 = half * leps;
-/* Computing 5th power */
-           d__3 = two, d__4 = d__3, d__3 *= d__3;
-/* Computing 2nd power */
-           d__5 = leps;
-           d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5);
-           c = dlamc3_(&d__1, &d__2);
-           d__1 = -c;
-           c = dlamc3_(&half, &d__1);
-           b = dlamc3_(&half, &c);
-           d__1 = -b;
-           c = dlamc3_(&half, &d__1);
-           b = dlamc3_(&half, &c);
-           goto L10;
-       }
-/* +       END WHILE */
-       if (a < leps) {
-           leps = a;
-       }
-/*        Computation of EPS complete.   
-          Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3
-          Keep dividing  A by BETA until (gradual) underflow occurs. T
-          is detected when we cannot recover the previous A. */
-       rbase = one / lbeta;
-       small = one;
-       for (i = 1; i <= 3; ++i) {
-           d__1 = small * rbase;
-           small = dlamc3_(&d__1, &zero);
-/* L20: */
-       }
-       a = dlamc3_(&one, &small);
-       dlamc4_(&ngpmin, &one, &lbeta);
-       d__1 = -one;
-       dlamc4_(&ngnmin, &d__1, &lbeta);
-       dlamc4_(&gpmin, &a, &lbeta);
-       d__1 = -a;
-       dlamc4_(&gnmin, &d__1, &lbeta);
-       ieee = FALSE_;
-       if (ngpmin == ngnmin && gpmin == gnmin) {
-           if (ngpmin == gpmin) {
-               lemin = ngpmin;
-/*            ( Non twos-complement machines, no gradual under
-                e.g.,  VAX ) */
-           } else if (gpmin - ngpmin == 3) {
-               lemin = ngpmin - 1 + lt;
-               ieee = TRUE_;
-/*            ( Non twos-complement machines, with gradual und
-                e.g., IEEE standard followers ) */
-           } else {
-               lemin = min(ngpmin,gpmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-       } else if (ngpmin == gpmin && ngnmin == gnmin) {
-           if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
-               lemin = max(ngpmin,ngnmin);
-/*            ( Twos-complement machines, no gradual underflow
-                e.g., CYBER 205 ) */
-           } else {
-               lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-       } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
-                {
-           if (gpmin - min(ngpmin,ngnmin) == 3) {
-               lemin = max(ngpmin,ngnmin) - 1 + lt;
-/*            ( Twos-complement machines with gradual underflo
-                no known machine ) */
-           } else {
-               lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-       } else {
-/* Computing MIN */
-           i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
-           lemin = min(i__1,gnmin);
-/*         ( A guess; no known machine ) */
-           iwarn = TRUE_;
-       }
-/* **   
-   Comment out this if block if EMIN is ok */
-       if (iwarn) {
-           first = TRUE_;
-           printf("\n\n WARNING. The value EMIN may be incorrect:- ");
-           printf("EMIN = %8i\n",lemin);
-           printf("If, after inspection, the value EMIN looks acceptable");
-            printf("please comment out \n the IF block as marked within the"); 
-            printf("code of routine DLAMC2, \n otherwise supply EMIN"); 
-            printf("explicitly.\n");
-       }
-/* **   
-          Assume IEEE arithmetic if we found denormalised  numbers abo
-          or if arithmetic seems to round in the  IEEE style,  determi
-          in routine DLAMC1. A true IEEE machine should have both  thi
-          true; however, faulty machines may have one or the other. */
-       ieee = ieee || lieee1;
-/*        Compute  RMIN by successive division by  BETA. We could comp
-          RMIN as BASE**( EMIN - 1 ),  but some machines underflow dur
-          this computation. */
-       lrmin = 1.;
-       i__1 = 1 - lemin;
-       for (i = 1; i <= 1-lemin; ++i) {
-           d__1 = lrmin * rbase;
-           lrmin = dlamc3_(&d__1, &zero);
-/* L30: */
-       }
-/*        Finally, call DLAMC5 to compute EMAX and RMAX. */
-       dlamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
-    }
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *eps = leps;
-    *emin = lemin;
-    *rmin = lrmin;
-    *emax = lemax;
-    *rmax = lrmax;
-    return 0;
-/*     End of DLAMC2 */
-} /* dlamc2_ */
-double dlamc3_(double *a, double *b)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-    Purpose   
-    =======   
-    DLAMC3  is intended to force  A  and  B  to be stored prior to doing 
-    the addition of  A  and  B ,  for use in situations where optimizers 
-    might hold one of these in a register.   
-    Arguments   
-    =========   
-    A, B    (input) DOUBLE PRECISION   
-            The values A and B.   
-   ===================================================================== 
-/* >>Start of File<<   
-       System generated locals */
-  volatile double ret_val; /* [added volatile to avoid -O3 optimizations.. 
(julien pommier)] */
-    ret_val = *a + *b;
-    return ret_val;
-/*     End of DLAMC3 */
-} /* dlamc3_ */
-/* Subroutine */ int dlamc4_(int *emin, double *start, int *base)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-    Purpose   
-    =======   
-    DLAMC4 is a service routine for DLAMC2.   
-    Arguments   
-    =========   
-    EMIN    (output) EMIN   
-            The minimum exponent before (gradual) underflow, computed by 
-            setting A = START and dividing by BASE until the previous A   
-            can not be recovered.   
-    START   (input) DOUBLE PRECISION   
-            The starting point for determining EMIN.   
-    BASE    (input) INT   
-            The base of the machine.   
-   ===================================================================== 
-    /* System generated locals */
-    int i__1;
-    double d__1;
-    /* Local variables */
-    static double zero, a;
-    static int i;
-    static double rbase, b1, b2, c1, c2, d1, d2;
-    extern double dlamc3_(double *, double *);
-    static double one;
-    a = *start;
-    one = 1.;
-    rbase = one / *base;
-    zero = 0.;
-    *emin = 1;
-    d__1 = a * rbase;
-    b1 = dlamc3_(&d__1, &zero);
-    c1 = a;
-    c2 = a;
-    d1 = a;
-    d2 = a;
-/* +    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.   
-      $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP */
-    if (c1 == a && c2 == a && d1 == a && d2 == a) {
-       --(*emin);
-       a = b1;
-       d__1 = a / *base;
-       b1 = dlamc3_(&d__1, &zero);
-       d__1 = b1 * *base;
-       c1 = dlamc3_(&d__1, &zero);
-       d1 = zero;
-       i__1 = *base;
-       for (i = 1; i <= *base; ++i) {
-           d1 += b1;
-/* L20: */
-       }
-       d__1 = a * rbase;
-       b2 = dlamc3_(&d__1, &zero);
-       d__1 = b2 / rbase;
-       c2 = dlamc3_(&d__1, &zero);
-       d2 = zero;
-       i__1 = *base;
-       for (i = 1; i <= *base; ++i) {
-           d2 += b2;
-/* L30: */
-       }
-       goto L10;
-    }
-/* +    END WHILE */
-    return 0;
-/*     End of DLAMC4 */
-} /* dlamc4_ */
-/* Subroutine */ int dlamc5_(int *beta, int *p, int *emin, 
-       int *ieee, int *emax, double *rmax)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-    Purpose   
-    =======   
-    DLAMC5 attempts to compute RMAX, the largest machine floating-point   
-    number, without overflow.  It assumes that EMAX + abs(EMIN) sum   
-    approximately to a power of 2.  It will fail on machines where this   
-    assumption does not hold, for example, the Cyber 205 (EMIN = -28625, 
-    EMAX = 28718).  It will also fail if the value supplied for EMIN is   
-    too large (i.e. too close to zero), probably with overflow.   
-    Arguments   
-    =========   
-    BETA    (input) INT   
-            The base of floating-point arithmetic.   
-    P       (input) INT   
-            The number of base BETA digits in the mantissa of a   
-            floating-point value.   
-    EMIN    (input) INT   
-            The minimum exponent before (gradual) underflow.   
-    IEEE    (input) INT   
-            A int flag specifying whether or not the arithmetic   
-            system is thought to comply with the IEEE standard.   
-    EMAX    (output) INT   
-            The largest exponent before overflow   
-    RMAX    (output) DOUBLE PRECISION   
-            The largest machine floating-point number.   
-   ===================================================================== 
-       First compute LEXP and UEXP, two powers of 2 that bound   
-       abs(EMIN). We then assume that EMAX + abs(EMIN) will sum   
-       approximately to the bound that is closest to abs(EMIN).   
-       (EMAX is the exponent of the required number RMAX). */
-    /* Table of constant values */
-    static double c_b5 = 0.;
-    /* System generated locals */
-    int i__1;
-    double d__1;
-    /* Local variables */
-    static int lexp;
-    static double oldy;
-    static int uexp, i;
-    static double y, z;
-    static int nbits;
-    extern double dlamc3_(double *, double *);
-    static double recbas;
-    static int exbits, expsum, try__;
-    lexp = 1;
-    exbits = 1;
-    try__ = lexp << 1;
-    if (try__ <= -(*emin)) {
-       lexp = try__;
-       ++exbits;
-       goto L10;
-    }
-    if (lexp == -(*emin)) {
-       uexp = lexp;
-    } else {
-       uexp = try__;
-       ++exbits;
-    }
-/*     Now -LEXP is less than or equal to EMIN, and -UEXP is greater   
-       than or equal to EMIN. EXBITS is the number of bits needed to   
-       store the exponent. */
-    if (uexp + *emin > -lexp - *emin) {
-       expsum = lexp << 1;
-    } else {
-       expsum = uexp << 1;
-    }
-/*     EXPSUM is the exponent range, approximately equal to   
-       EMAX - EMIN + 1 . */
-    *emax = expsum + *emin - 1;
-    nbits = exbits + 1 + *p;
-/*     NBITS is the total number of bits needed to store a   
-       floating-point number. */
-    if (nbits % 2 == 1 && *beta == 2) {
-/*        Either there are an odd number of bits used to store a   
-          floating-point number, which is unlikely, or some bits are 
-          not used in the representation of numbers, which is possible
-          (e.g. Cray machines) or the mantissa has an implicit bit,   
-          (e.g. IEEE machines, Dec Vax machines), which is perhaps the
-          most likely. We have to assume the last alternative.   
-          If this is true, then we need to reduce EMAX by one because 
-          there must be some way of representing zero in an implicit-b
-          system. On machines like Cray, we are reducing EMAX by one 
-          unnecessarily. */
-       --(*emax);
-    }
-    if (*ieee) {
-/*        Assume we are on an IEEE machine which reserves one exponent
-          for infinity and NaN. */
-       --(*emax);
-    }
-/*     Now create RMAX, the largest machine number, which should   
-       be equal to (1.0 - BETA**(-P)) * BETA**EMAX .   
-       First compute 1.0 - BETA**(-P), being careful that the   
-       result is less than 1.0 . */
-    recbas = 1. / *beta;
-    z = *beta - 1.;
-    y = 0.;
-    i__1 = *p;
-    for (i = 1; i <= *p; ++i) {
-       z *= recbas;
-       if (y < 1.) {
-           oldy = y;
-       }
-       y = dlamc3_(&y, &z);
-/* L20: */
-    }
-    if (y >= 1.) {
-       y = oldy;
-    }
-/*     Now multiply by BETA**EMAX to get RMAX. */
-    i__1 = *emax;
-    for (i = 1; i <= *emax; ++i) {
-       d__1 = y * *beta;
-       y = dlamc3_(&d__1, &c_b5);
-/* L30: */
-    }
-    *rmax = y;
-    return 0;
-/*     End of DLAMC5 */
-} /* dlamc5_ */
-double pow_di(double *ap, int *bp)
-    double pow, x;
-    int n;
-    pow = 1;
-    x = *ap;
-    n = *bp;
-    if(n != 0){
-       if(n < 0) {
-           n = -n;
-           x = 1/x;
-       }
-       for( ; ; ) {
-           if(n & 01) pow *= x;
-           if(n >>= 1) x *= x;
-           else break;
-       }
-    }
-    return(pow);
diff --git a/superlu/dlangs.c b/superlu/dlangs.c
deleted file mode 100644
index 811fb473..00000000
--- a/superlu/dlangs.c
+++ /dev/null
@@ -1,132 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  dlangs.c
- * History:     Modified from lapack routine DLANGE
- */
-#include <math.h>
-#include "slu_ddefs.h"
-double dlangs(char *norm, SuperMatrix *A)
-    Purpose   
-    =======   
-    DLANGS returns the value of the one norm, or the Frobenius norm, or 
-    the infinity norm, or the element of largest absolute value of a 
-    real matrix A.   
-    Description   
-    ===========   
-    DLANGE returns the value   
-       DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
-                (   
-                ( norm1(A),         NORM = '1', 'O' or 'o'   
-                (   
-                ( normI(A),         NORM = 'I' or 'i'   
-                (   
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
-    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
-    Arguments   
-    =========   
-    NORM    (input) CHARACTER*1   
-            Specifies the value to be returned in DLANGE as described above.   
-    A       (input) SuperMatrix*
-            The M by N sparse matrix A. 
-   ===================================================================== 
-    /* Local variables */
-    NCformat *Astore;
-    double   *Aval;
-    int      i, j, irow;
-    double   value, sum;
-    double   *rwork;
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) {
-       value = 0.;
-    } else if (lsame_(norm, "M")) {
-       /* Find max(abs(A(i,j))). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
-               value = SUPERLU_MAX( value, fabs( Aval[i]) );
-    } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') {
-       /* Find norm1(A). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j) {
-           sum = 0.;
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) 
-               sum += fabs(Aval[i]);
-           value = SUPERLU_MAX(value,sum);
-       }
-    } else if (lsame_(norm, "I")) {
-       /* Find normI(A). */
-       if ( !(rwork = (double *) SUPERLU_MALLOC(A->nrow * sizeof(double))) )
-           ABORT("SUPERLU_MALLOC fails for rwork.");
-       for (i = 0; i < A->nrow; ++i) rwork[i] = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) {
-               irow = Astore->rowind[i];
-               rwork[irow] += fabs(Aval[i]);
-           }
-       value = 0.;
-       for (i = 0; i < A->nrow; ++i)
-           value = SUPERLU_MAX(value, rwork[i]);
-       SUPERLU_FREE (rwork);
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
-       /* Find normF(A). */
-       ABORT("Not implemented.");
-    } else
-       ABORT("Illegal norm specified.");
-    return (value);
-} /* dlangs */
diff --git a/superlu/dlaqgs.c b/superlu/dlaqgs.c
deleted file mode 100644
index 807a5c1c..00000000
--- a/superlu/dlaqgs.c
+++ /dev/null
@@ -1,158 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  dlaqgs.c
- * History:     Modified from LAPACK routine DLAQGE
- */
-#include <math.h>
-#include "slu_ddefs.h"
-dlaqgs(SuperMatrix *A, double *r, double *c, 
-       double rowcnd, double colcnd, double amax, char *equed)
-    Purpose   
-    =======   
-    DLAQGS equilibrates a general sparse M by N matrix A using the row and   
-    scaling factors in the vectors R and C.   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    A       (input/output) SuperMatrix*
-            On exit, the equilibrated matrix.  See EQUED for the form of 
-            the equilibrated matrix. The type of A can be:
-           Stype = NC; Dtype = SLU_D; Mtype = GE.
-    R       (input) double*, dimension (A->nrow)
-            The row scale factors for A.
-    C       (input) double*, dimension (A->ncol)
-            The column scale factors for A.
-    ROWCND  (input) double
-            Ratio of the smallest R(i) to the largest R(i).
-    COLCND  (input) double
-            Ratio of the smallest C(i) to the largest C(i).
-    AMAX    (input) double
-            Absolute value of largest matrix entry.
-    EQUED   (output) char*
-            Specifies the form of equilibration that was done.   
-            = 'N':  No equilibration   
-            = 'R':  Row equilibration, i.e., A has been premultiplied by  
-                    diag(R).   
-            = 'C':  Column equilibration, i.e., A has been postmultiplied  
-                    by diag(C).   
-            = 'B':  Both row and column equilibration, i.e., A has been
-                    replaced by diag(R) * A * diag(C).   
-    Internal Parameters   
-    ===================   
-    THRESH is a threshold value used to decide if row or column scaling   
-    should be done based on the ratio of the row or column scaling   
-    factors.  If ROWCND < THRESH, row scaling is done, and if   
-    COLCND < THRESH, column scaling is done.   
-    LARGE and SMALL are threshold values used to decide if row scaling   
-    should be done based on the absolute size of the largest matrix   
-    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
-    ===================================================================== 
-#define THRESH    (0.1)
-    /* Local variables */
-    NCformat *Astore;
-    double   *Aval;
-    int i, j, irow;
-    double large, small, cj;
-    extern double dlamch_(char *);
-    /* Quick return if possible */
-    if (A->nrow <= 0 || A->ncol <= 0) {
-       *(unsigned char *)equed = 'N';
-       return;
-    }
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Initialize LARGE and SMALL. */
-    small = dlamch_("Safe minimum") / dlamch_("Precision");
-    large = 1. / small;
-    if (rowcnd >= THRESH && amax >= small && amax <= large) {
-       if (colcnd >= THRESH)
-           *(unsigned char *)equed = 'N';
-       else {
-           /* Column scaling */
-           for (j = 0; j < A->ncol; ++j) {
-               cj = c[j];
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   Aval[i] *= cj;
-                }
-           }
-           *(unsigned char *)equed = 'C';
-       }
-    } else if (colcnd >= THRESH) {
-       /* Row scaling, no column scaling */
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               Aval[i] *= r[irow];
-           }
-       *(unsigned char *)equed = 'R';
-    } else {
-       /* Row and column scaling */
-       for (j = 0; j < A->ncol; ++j) {
-           cj = c[j];
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               Aval[i] *= cj * r[irow];
-           }
-       }
-       *(unsigned char *)equed = 'B';
-    }
-    return;
-} /* dlaqgs */
diff --git a/superlu/dmemory.c b/superlu/dmemory.c
deleted file mode 100644
index 4ca23363..00000000
--- a/superlu/dmemory.c
+++ /dev/null
@@ -1,690 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_ddefs.h"
-/* Constants */
-#define NO_MEMTYPE  4      /* 0: lusup;
-                             1: ucol;
-                             2: lsub;
-                             3: usub */
-#define GluIntArray(n)   (5 * (n) + 5)
-/* Internal prototypes */
-void  *dexpand (int *, MemType,int, int, GlobalLU_t *);
-int   dLUWorkInit (int, int, int, int **, double **, LU_space_t);
-void  copy_mem_double (int, void *, void *);
-void  dStackCompress (GlobalLU_t *);
-void  dSetupSpace (void *, int, LU_space_t *);
-void  *duser_malloc (int, int);
-void  duser_free (int, int);
-/* External prototypes (in memory.c - prec-indep) */
-extern void    copy_mem_int    (int, void *, void *);
-extern void    user_bcopy      (char *, char *, int);
-/* Headers for 4 types of dynamatically managed memory */
-typedef struct e_node {
-    int size;      /* length of the memory that has been used */
-    void *mem;     /* pointer to the new malloc'd store */
-} ExpHeader;
-typedef struct {
-    int  size;
-    int  used;
-    int  top1;  /* grow upward, relative to &array[0] */
-    int  top2;  /* grow downward */
-    void *array;
-} LU_stack_t;
-/* Variables local to this file */
-static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
-static LU_stack_t stack;
-static int no_expand;
-/* Macros to manipulate stack */
-#define StackFull(x)         ( x + stack.used >= stack.size )
-#define NotDoubleAlign(addr) ( (long int)addr & 7 )
-#define DoubleAlign(addr)    ( ((long int)addr + 7) & ~7L )
-#define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
-                             (w + 1) * m * sizeof(double) )
-#define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
- * Setup the memory model to be used for factorization.
- *    lwork = 0: use system malloc;
- *    lwork > 0: use user-supplied work[] space.
- */
-void dSetupSpace(void *work, int lwork, LU_space_t *MemModel)
-    if ( lwork == 0 ) {
-       *MemModel = SYSTEM; /* malloc/free */
-    } else if ( lwork > 0 ) {
-       *MemModel = USER;   /* user provided space */
-       stack.used = 0;
-       stack.top1 = 0;
-       stack.top2 = (lwork/4)*4; /* must be word addressable */
-       stack.size = stack.top2;
-       stack.array = (void *) work;
-    }
-void *duser_malloc(int bytes, int which_end)
-    void *buf;
-    if ( StackFull(bytes) ) return (NULL);
-    if ( which_end == HEAD ) {
-       buf = (char*) stack.array + stack.top1;
-       stack.top1 += bytes;
-    } else {
-       stack.top2 -= bytes;
-       buf = (char*) stack.array + stack.top2;
-    }
-    stack.used += bytes;
-    return buf;
-void duser_free(int bytes, int which_end)
-    if ( which_end == HEAD ) {
-       stack.top1 -= bytes;
-    } else {
-       stack.top2 += bytes;
-    }
-    stack.used -= bytes;
- * mem_usage consists of the following fields:
- *    - for_lu (float)
- *      The amount of space used in bytes for the L\U data structures.
- *    - total_needed (float)
- *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
- */
-int dQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
-    SCformat *Lstore;
-    NCformat *Ustore;
-    register int n, iword, dword, panel_size = sp_ienv(1);
-    Lstore = L->Store;
-    Ustore = U->Store;
-    n = L->ncol;
-    iword = sizeof(int);
-    dword = sizeof(double);
-    /* For LU factors */
-    mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
-                                dword + Lstore->rowind_colptr[n] * iword );
-    mem_usage->for_lu += (float)( (n + 1) * iword +
-                                Ustore->colptr[n] * (dword + iword) );
-    /* Working storage to support factorization */
-    mem_usage->total_needed = mem_usage->for_lu +
-       (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
-               (panel_size + 1) * n * dword );
-    mem_usage->expansions = --no_expand;
-    return 0;
-} /* dQuerySpace */
- * Allocate storage for the data structures common to all factor routines.
- * For those unpredictable size, make a guess as FILL * nnz(A).
- * Return value:
- *     If lwork = -1, return the estimated amount of space required, plus n;
- *     otherwise, return the amount of space actually allocated when
- *     memory allocation failure occurred.
- */
-dLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
-         int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
-         int **iwork, double **dwork)
-    int      info, iword, dword;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    int      *xsup, *supno;
-    int      *lsub, *xlsub;
-    double   *lusup;
-    int      *xlusup;
-    double   *ucol;
-    int      *usub, *xusub;
-    int      nzlmax, nzumax, nzlumax;
-    int      FILL = sp_ienv(6);
-    Glu->n    = n;
-    no_expand = 0;
-    iword     = sizeof(int);
-    dword     = sizeof(double);
-    if ( !expanders )  
-        expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
-    if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
-    if ( fact != SamePattern_SameRowPerm ) {
-       /* Guess for L\U factors */
-       nzumax = nzlumax = FILL * annz;
-       nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else {
-           dSetupSpace(work, lwork, &Glu->MemModel);
-       }
-#if ( PRNTlevel >= 1 )
-       printf("dLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
-              FILL, nzlmax, nzumax);
-       fflush(stdout);
-       /* Integer pointers for L\U factors */
-       if ( Glu->MemModel == SYSTEM ) {
-           xsup   = intMalloc(n+1);
-           supno  = intMalloc(n+1);
-           xlsub  = intMalloc(n+1);
-           xlusup = intMalloc(n+1);
-           xusub  = intMalloc(n+1);
-       } else {
-           xsup   = (int *)duser_malloc((n+1) * iword, HEAD);
-           supno  = (int *)duser_malloc((n+1) * iword, HEAD);
-           xlsub  = (int *)duser_malloc((n+1) * iword, HEAD);
-           xlusup = (int *)duser_malloc((n+1) * iword, HEAD);
-           xusub  = (int *)duser_malloc((n+1) * iword, HEAD);
-       }
-       lusup = (double *) dexpand( &nzlumax, LUSUP, 0, 0, Glu );
-       ucol  = (double *) dexpand( &nzumax, UCOL, 0, 0, Glu );
-       lsub  = (int *)    dexpand( &nzlmax, LSUB, 0, 0, Glu );
-       usub  = (int *)    dexpand( &nzumax, USUB, 0, 1, Glu );
-       while ( !lusup || !ucol || !lsub || !usub ) {
-           if ( Glu->MemModel == SYSTEM ) {
-               SUPERLU_FREE(lusup); 
-               SUPERLU_FREE(ucol); 
-               SUPERLU_FREE(lsub); 
-               SUPERLU_FREE(usub);
-           } else {
-               duser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
-           }
-           nzlumax /= 2;
-           nzumax /= 2;
-           nzlmax /= 2;
-           if ( nzlumax < annz ) {
-               printf("Not enough memory to perform factorization.\n");
-               return (dmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-           }
-#if ( PRNTlevel >= 1)
-           printf("dLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
-                  nzlmax, nzumax);
-           fflush(stdout);
-           lusup = (double *) dexpand( &nzlumax, LUSUP, 0, 0, Glu );
-           ucol  = (double *) dexpand( &nzumax, UCOL, 0, 0, Glu );
-           lsub  = (int *)    dexpand( &nzlmax, LSUB, 0, 0, Glu );
-           usub  = (int *)    dexpand( &nzumax, USUB, 0, 1, Glu );
-       }
-    } else {
-       /* fact == SamePattern_SameRowPerm */
-       Lstore   = L->Store;
-       Ustore   = U->Store;
-       xsup     = Lstore->sup_to_col;
-       supno    = Lstore->col_to_sup;
-       xlsub    = Lstore->rowind_colptr;
-       xlusup   = Lstore->nzval_colptr;
-       xusub    = Ustore->colptr;
-       nzlmax   = Glu->nzlmax;    /* max from previous factorization */
-       nzumax   = Glu->nzumax;
-       nzlumax  = Glu->nzlumax;
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else if ( lwork == 0 ) {
-           Glu->MemModel = SYSTEM;
-       } else {
-           Glu->MemModel = USER;
-           stack.top2 = (lwork/4)*4; /* must be word-addressable */
-           stack.size = stack.top2;
-       }
-       lsub  = expanders[LSUB].mem  = Lstore->rowind;
-       lusup = expanders[LUSUP].mem = Lstore->nzval;
-       usub  = expanders[USUB].mem  = Ustore->rowind;
-       ucol  = expanders[UCOL].mem  = Ustore->nzval;;
-       expanders[LSUB].size         = nzlmax;
-       expanders[LUSUP].size        = nzlumax;
-       expanders[USUB].size         = nzumax;
-       expanders[UCOL].size         = nzumax;  
-    }
-    Glu->xsup    = xsup;
-    Glu->supno   = supno;
-    Glu->lsub    = lsub;
-    Glu->xlsub   = xlsub;
-    Glu->lusup   = lusup;
-    Glu->xlusup  = xlusup;
-    Glu->ucol    = ucol;
-    Glu->usub    = usub;
-    Glu->xusub   = xusub;
-    Glu->nzlmax  = nzlmax;
-    Glu->nzumax  = nzumax;
-    Glu->nzlumax = nzlumax;
-    info = dLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
-    if ( info )
-       return ( info + dmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-    ++no_expand;
-    return 0;
-} /* dLUMemInit */
-/* Allocate known working storage. Returns 0 if success, otherwise
-   returns the number of bytes allocated so far when failure occurred. */
-dLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
-            double **dworkptr, LU_space_t MemModel)
-    int    isize, dsize, extra;
-    double *old_ptr;
-    int    maxsuper = sp_ienv(3),
-           rowblk   = sp_ienv(4);
-    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
-    dsize = (m * panel_size +
-            NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(double);
-    if ( MemModel == SYSTEM ) 
-       *iworkptr = (int *) intCalloc(isize/sizeof(int));
-    else
-       *iworkptr = (int *) duser_malloc(isize, TAIL);
-    if ( ! *iworkptr ) {
-       fprintf(stderr, "dLUWorkInit: malloc fails for local iworkptr[]\n");
-       return (isize + n);
-    }
-    if ( MemModel == SYSTEM )
-       *dworkptr = (double *) SUPERLU_MALLOC(dsize);
-    else {
-       *dworkptr = (double *) duser_malloc(dsize, TAIL);
-       if ( NotDoubleAlign(*dworkptr) ) {
-           old_ptr = *dworkptr;
-           *dworkptr = (double*) DoubleAlign(*dworkptr);
-           *dworkptr = (double*) ((double*)*dworkptr - 1);
-           extra = (char*)old_ptr - (char*)*dworkptr;
-#ifdef DEBUG       
-           printf("dLUWorkInit: not aligned, extra %d\n", extra);
-           stack.top2 -= extra;
-           stack.used += extra;
-       }
-    }
-    if ( ! *dworkptr ) {
-       fprintf(stderr, "malloc fails for local dworkptr[].");
-       return (isize + dsize + n);
-    }
-    return 0;
- * Set up pointers for real working arrays.
- */
-dSetRWork(int m, int panel_size, double *dworkptr,
-        double **dense, double **tempv)
-    double zero = 0.0;
-    int maxsuper = sp_ienv(3),
-        rowblk   = sp_ienv(4);
-    *dense = dworkptr;
-    *tempv = *dense + panel_size*m;
-    dfill (*dense, m * panel_size, zero);
-    dfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
- * Free the working storage used by factor routines.
- */
-void dLUWorkFree(int *iwork, double *dwork, GlobalLU_t *Glu)
-    if ( Glu->MemModel == SYSTEM ) {
-       SUPERLU_FREE (iwork);
-       SUPERLU_FREE (dwork);
-    } else {
-       stack.used -= (stack.size - stack.top2);
-       stack.top2 = stack.size;
-/*     dStackCompress(Glu);  */
-    }
-    SUPERLU_FREE (expanders);  
-    expanders = 0;
-/* Expand the data structures for L and U during the factorization.
- * Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-dLUMemXpand(int jcol,
-          int next,          /* number of elements currently in the factors */
-          MemType mem_type,  /* which type of memory to expand  */
-          int *maxlen,       /* modified - maximum length of a data structure 
-          GlobalLU_t *Glu    /* modified - global LU data structures */
-          )
-    void   *new_mem;
-#ifdef DEBUG    
-    printf("dLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
-          jcol, next, *maxlen, mem_type);
-    if (mem_type == USUB) 
-       new_mem = dexpand(maxlen, mem_type, next, 1, Glu);
-    else
-       new_mem = dexpand(maxlen, mem_type, next, 0, Glu);
-    if ( !new_mem ) {
-       int    nzlmax  = Glu->nzlmax;
-       int    nzumax  = Glu->nzumax;
-       int    nzlumax = Glu->nzlumax;
-       fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
-       return (dmemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
-    }
-    switch ( mem_type ) {
-      case LUSUP:
-       Glu->lusup   = (double *) new_mem;
-       Glu->nzlumax = *maxlen;
-       break;
-      case UCOL:
-       Glu->ucol   = (double *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-      case LSUB:
-       Glu->lsub   = (int *) new_mem;
-       Glu->nzlmax = *maxlen;
-       break;
-      case USUB:
-       Glu->usub   = (int *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-    }
-    return 0;
-copy_mem_double(int howmany, void *old, void *new)
-    register int i;
-    double *dold = old;
-    double *dnew = new;
-    for (i = 0; i < howmany; i++) dnew[i] = dold[i];
- * Expand the existing storage to accommodate more fill-ins.
- */
-*dexpand (
-        int *prev_len,   /* length used from previous call */
-        MemType type,    /* which part of the memory to expand */
-        int len_to_copy, /* size of the memory to be copied to new store */
-        int keep_prev,   /* = 1: use prev_len;
-                            = 0: compute new_len to expand */
-        GlobalLU_t *Glu  /* modified - global LU data structures */
-       )
-    float    EXPAND = 1.5;
-    float    alpha;
-    void     *new_mem, *old_mem;
-    int      new_len, tries, lword, extra, bytes_to_copy;
-    alpha = EXPAND;
-    if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
-        new_len = *prev_len;
-    else {
-       new_len = alpha * *prev_len;
-    }
-    if ( type == LSUB || type == USUB ) lword = sizeof(int);
-    else lword = sizeof(double);
-    if ( Glu->MemModel == SYSTEM ) {
-       new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-       if ( no_expand != 0 ) {
-           tries = 0;
-           if ( keep_prev ) {
-               if ( !new_mem ) return (NULL);
-           } else {
-               while ( !new_mem ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-               }
-           }
-           if ( type == LSUB || type == USUB ) {
-               copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
-           } else {
-               copy_mem_double(len_to_copy, expanders[type].mem, new_mem);
-           }
-           SUPERLU_FREE (expanders[type].mem);
-       }
-       expanders[type].mem = (void *) new_mem;
-    } else { /* MemModel == USER */
-       if ( no_expand == 0 ) {
-           new_mem = duser_malloc(new_len * lword, HEAD);
-           if ( NotDoubleAlign(new_mem) &&
-               (type == LUSUP || type == UCOL) ) {
-               old_mem = new_mem;
-               new_mem = (void *)DoubleAlign(new_mem);
-               extra = (char*)new_mem - (char*)old_mem;
-#ifdef DEBUG           
-               printf("expand(): not aligned, extra %d\n", extra);
-               stack.top1 += extra;
-               stack.used += extra;
-           }
-           expanders[type].mem = (void *) new_mem;
-       }
-       else {
-           tries = 0;
-           extra = (new_len - *prev_len) * lword;
-           if ( keep_prev ) {
-               if ( StackFull(extra) ) return (NULL);
-           } else {
-               while ( StackFull(extra) ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   extra = (new_len - *prev_len) * lword;          
-               }
-           }
-           if ( type != USUB ) {
-               new_mem = (void*)((char*)expanders[type + 1].mem + extra);
-               bytes_to_copy = (char*)stack.array + stack.top1
-                   - (char*)expanders[type + 1].mem;
-               user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
-               if ( type < USUB ) {
-                   Glu->usub = expanders[USUB].mem =
-                       (void*)((char*)expanders[USUB].mem + extra);
-               }
-               if ( type < LSUB ) {
-                   Glu->lsub = expanders[LSUB].mem =
-                       (void*)((char*)expanders[LSUB].mem + extra);
-               }
-               if ( type < UCOL ) {
-                   Glu->ucol = expanders[UCOL].mem =
-                       (void*)((char*)expanders[UCOL].mem + extra);
-               }
-               stack.top1 += extra;
-               stack.used += extra;
-               if ( type == UCOL ) {
-                   stack.top1 += extra;   /* Add same amount for USUB */
-                   stack.used += extra;
-               }
-           } /* if ... */
-       } /* else ... */
-    }
-    expanders[type].size = new_len;
-    *prev_len = new_len;
-    if ( no_expand ) ++no_expand;
-    return (void *) expanders[type].mem;
-} /* dexpand */
- * Compress the work[] array to remove fragmentation.
- */
-dStackCompress(GlobalLU_t *Glu)
-    register int iword, dword, ndim;
-    char    *last, *fragment;
-    int      *ifrom, *ito;
-    double   *dfrom, *dto;
-    int      *xlsub, *lsub, *xusub, *usub, *xlusup;
-    double   *ucol, *lusup;
-    iword = sizeof(int);
-    dword = sizeof(double);
-    ndim = Glu->n;
-    xlsub  = Glu->xlsub;
-    lsub   = Glu->lsub;
-    xusub  = Glu->xusub;
-    usub   = Glu->usub;
-    xlusup = Glu->xlusup;
-    ucol   = Glu->ucol;
-    lusup  = Glu->lusup;
-    dfrom = ucol;
-    dto = (double *)((char*)lusup + xlusup[ndim] * dword);
-    copy_mem_double(xusub[ndim], dfrom, dto);
-    ucol = dto;
-    ifrom = lsub;
-    ito = (int *) ((char*)ucol + xusub[ndim] * iword);
-    copy_mem_int(xlsub[ndim], ifrom, ito);
-    lsub = ito;
-    ifrom = usub;
-    ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
-    copy_mem_int(xusub[ndim], ifrom, ito);
-    usub = ito;
-    last = (char*)usub + xusub[ndim] * iword;
-    fragment = (char*) (((char*)stack.array + stack.top1) - last);
-    stack.used -= (long int) fragment;
-    stack.top1 -= (long int) fragment;
-    Glu->ucol = ucol;
-    Glu->lsub = lsub;
-    Glu->usub = usub;
-#ifdef DEBUG
-    printf("dStackCompress: fragment %d\n", fragment);
-    /* for (last = 0; last < ndim; ++last)
-       print_lu_col("After compress:", last, 0);*/
- * Allocate storage for original matrix A
- */
-dallocateA(int n, int nnz, double **a, int **asub, int **xa)
-    *a    = (double *) doubleMalloc(nnz);
-    *asub = (int *) intMalloc(nnz);
-    *xa   = (int *) intMalloc(n+1);
-double *doubleMalloc(int n)
-    double *buf;
-    buf = (double *) SUPERLU_MALLOC((size_t)n * sizeof(double)); 
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in doubleMalloc()\n");
-    }
-    return (buf);
-double *doubleCalloc(int n)
-    double *buf;
-    register int i;
-    double zero = 0.0;
-    buf = (double *) SUPERLU_MALLOC((size_t)n * sizeof(double));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in doubleCalloc()\n");
-    }
-    for (i = 0; i < n; ++i) buf[i] = zero;
-    return (buf);
-int dmemory_usage(const int nzlmax, const int nzumax, 
-                 const int nzlumax, const int n)
-    register int iword, dword;
-    iword   = sizeof(int);
-    dword   = sizeof(double);
-    return (10 * n * iword +
-           nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
diff --git a/superlu/dmyblas2.c b/superlu/dmyblas2.c
deleted file mode 100644
index c5dad344..00000000
--- a/superlu/dmyblas2.c
+++ /dev/null
@@ -1,246 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:          dmyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void dlsolve ( int ldm, int ncol, double *M, double *rhs )
-    int k;
-    double x0, x1, x2, x3, x4, x5, x6, x7;
-    double *M0;
-    register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-    M0 = &M[0];
-    while ( firstcol < ncol - 7 ) { /* Do 8 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-      Mki4 = Mki3 + ldm + 1;
-      Mki5 = Mki4 + ldm + 1;
-      Mki6 = Mki5 + ldm + 1;
-      Mki7 = Mki6 + ldm + 1;
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-      x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++;
-      x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++;
-      x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
-      x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
-                          - x6 * *Mki6++;
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      rhs[++firstcol] = x4;
-      rhs[++firstcol] = x5;
-      rhs[++firstcol] = x6;
-      rhs[++firstcol] = x7;
-      ++firstcol;
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-                       - x2 * *Mki2++ - x3 * *Mki3++
-                        - x4 * *Mki4++ - x5 * *Mki5++
-                       - x6 * *Mki6++ - x7 * *Mki7++;
-      M0 += 8 * ldm + 8;
-    }
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      ++firstcol;
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-                       - x2 * *Mki2++ - x3 * *Mki3++;
-      M0 += 4 * ldm + 4;
-    }
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      rhs[++firstcol] = x1;
-      ++firstcol;
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
-    }
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-dusolve ( ldm, ncol, M, rhs )
-int ldm;       /* in */
-int ncol;      /* in */
-double *M;     /* in */
-double *rhs;   /* modified */
-    double xj;
-    int jcol, j, irow;
-    jcol = ncol - 1;
-    for (j = 0; j < ncol; j++) {
-       xj = rhs[jcol] / M[jcol + jcol*ldm];            /* M(jcol, jcol) */
-       rhs[jcol] = xj;
-       for (irow = 0; irow < jcol; irow++)
-           rhs[irow] -= xj * M[irow + jcol*ldm];       /* M(irow, jcol) */
-       jcol--;
-    }
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void dmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-int ldm;       /* in -- leading dimension of M */
-int nrow;      /* in */ 
-int ncol;      /* in */
-double *M;     /* in */
-double *vec;   /* in */
-double *Mxvec; /* in/out */
-    double vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
-    double *M0;
-    register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-    int k;
-    M0 = &M[0];
-    while ( firstcol < ncol - 7 ) {    /* Do 8 columns */
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-        Mki2 = Mki1 + ldm;
-        Mki3 = Mki2 + ldm;
-       Mki4 = Mki3 + ldm;
-       Mki5 = Mki4 + ldm;
-       Mki6 = Mki5 + ldm;
-       Mki7 = Mki6 + ldm;
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       vi4 = vec[firstcol++];
-       vi5 = vec[firstcol++];
-       vi6 = vec[firstcol++];
-       vi7 = vec[firstcol++];  
-       for (k = 0; k < nrow; k++) 
-           Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-                     + vi2 * *Mki2++ + vi3 * *Mki3++ 
-                     + vi4 * *Mki4++ + vi5 * *Mki5++
-                     + vi6 * *Mki6++ + vi7 * *Mki7++;
-       M0 += 8 * ldm;
-    }
-    while ( firstcol < ncol - 3 ) {    /* Do 4 columns */
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-       Mki2 = Mki1 + ldm;
-       Mki3 = Mki2 + ldm;
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       for (k = 0; k < nrow; k++) 
-           Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-                     + vi2 * *Mki2++ + vi3 * *Mki3++ ;
-       M0 += 4 * ldm;
-    }
-    while ( firstcol < ncol ) {                /* Do 1 column */
-       Mki0 = M0;
-       vi0 = vec[firstcol++];
-       for (k = 0; k < nrow; k++)
-           Mxvec[k] += vi0 * *Mki0++;
-       M0 += ldm;
-    }
diff --git a/superlu/dpanel_bmod.c b/superlu/dpanel_bmod.c
deleted file mode 100644
index 0e1cc00a..00000000
--- a/superlu/dpanel_bmod.c
+++ /dev/null
@@ -1,449 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_ddefs.h"
-extern void dtrsv_();
-extern void dgemv_();
- * Function prototypes 
- */
-void dlsolve(int, int, double *, double *);
-void dmatvec(int, int, int, double *, double *, double *);
-extern void dcheck_tempv();
-dpanel_bmod (
-           const int  m,          /* in - number of rows in the matrix */
-           const int  w,          /* in */
-           const int  jcol,       /* in */
-           const int  nseg,       /* in */
-           double     *dense,     /* out, of size n by w */
-           double     *tempv,     /* working array */
-           int        *segrep,    /* in */
-           int        *repfnz,    /* in, of size n by w */
-           GlobalLU_t *Glu,       /* modified */
-           SuperLUStat_t *stat    /* output */
-           )
- * Purpose
- * =======
- *
- *    Performs numeric block updates (sup-panel) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- *    Before entering this routine, the original nonzeros in the panel 
- *    were already copied into the spa[m,w].
- *
- *    Updated/Output parameters-
- *     dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
- *      collectively in the m-by-w vector dense[*]. 
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-    int          incx = 1, incy = 1;
-    double       alpha, beta;
-    register int k, ksub;
-    int          fsupc, nsupc, nsupr, nrow;
-    int          krep, krep_ind;
-    double       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          segsze;
-    int          block_nrow;  /* no of rows in a block row */
-    register int lptr;       /* Points to the row subscripts of a supernode */
-    int          kfnz, irow, no_zeros; 
-    register int isub, isub1, i;
-    register int jj;         /* Index through each column in the panel */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    double       *lusup;
-    int          *xlusup;
-    int          *repfnz_col; /* repfnz[] for a column in the panel */
-    double       *dense_col;  /* dense[] for a column in the panel */
-    double       *tempv1;             /* Used in 1-D update */
-    double       *TriTmp, *MatvecTmp; /* used in 2-D update */
-    double      zero = 0.0;
-    double      one = 1.0;
-    register int ldaTmp;
-    register int r_ind, r_hi;
-    static   int first = 1, maxsuper, rowblk, colblk;
-    flops_t  *ops = stat->ops;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       rowblk   = sp_ienv(4);
-       colblk   = sp_ienv(5);
-       first = 0;
-    }
-    ldaTmp = maxsuper + rowblk;
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
-       /* krep = representative of current k-th supernode
-        * fsupc = first supernodal column
-        * nsupc = no of columns in a supernode
-        * nsupr = no of rows in a supernode
-        */
-        krep = segrep[k--];
-       fsupc = xsup[supno[krep]];
-       nsupc = krep - fsupc + 1;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nrow = nsupr - nsupc;
-       lptr = xlsub[fsupc];
-       krep_ind = lptr + nsupc - 1;
-       repfnz_col = repfnz;
-       dense_col = dense;
-       if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
-           TriTmp = tempv;
-           /* Sequence through each column in panel -- triangular solves */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-               ops[TRSV] += segsze * (segsze - 1);
-               ops[GEMV] += 2 * nrow * segsze;
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       dense_col[irow] -= ukj * lusup[luptr];
-                       ++luptr;
-                   }
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   luptr1 = luptr - nsupr;
-                   if ( segsze == 2 ) {
-                       ukj -= ukj1 * lusup[luptr1];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++;
-                           dense_col[irow] -= (ukj*lusup[luptr]
-                                               + ukj1*lusup[luptr1]);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       ukj1 -= ukj2 * lusup[luptr2-1];
-                       ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++; luptr2++;
-                           dense_col[irow] -= ( ukj*lusup[luptr]
-                             + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                       }
-                   }
-               } else  {       /* segsze >= 4 */
-                   /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
-                      holds the result of triangular solves.    */
-                   no_zeros = kfnz - fsupc;
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       TriTmp[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-#ifdef _CRAY
-                   STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-                   dtrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-                   dlsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
-               } /* else ... */
-           }  /* for jj ... end tri-solves */
-           /* Block row updates; push all the way into dense[*] block */
-           for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
-               r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
-               block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
-               luptr = xlusup[fsupc] + nsupc + r_ind;
-               isub1 = lptr + nsupc + r_ind;
-               repfnz_col = repfnz;
-               TriTmp = tempv;
-               dense_col = dense;
-               /* Sequence through each column in panel -- matrix-vector */
-               for (jj = jcol; jj < jcol + w; jj++,
-                    repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-                   kfnz = repfnz_col[krep];
-                   if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-                   segsze = krep - kfnz + 1;
-                   if ( segsze <= 3 ) continue;   /* skip unrolled cases */
-                   /* Perform a block update, and scatter the result of
-                      matrix-vector to dense[].                 */
-                   no_zeros = kfnz - fsupc;
-                   luptr1 = luptr + nsupr * no_zeros;
-                   MatvecTmp = &TriTmp[maxsuper];
-                   alpha = one; 
-                    beta = zero;
-#ifdef _CRAY
-                   SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-                   dgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-                   dmatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
-                          TriTmp, MatvecTmp);
-                   /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
-                    * such that MatvecTmp[*] can be re-used for the
-                    * the next blok row update. dense[] will be copied into 
-                    * global store after the whole panel has been finished.
-                    */
-                   isub = isub1;
-                   for (i = 0; i < block_nrow; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] -= MatvecTmp[i];
-                       MatvecTmp[i] = zero;
-                       ++isub;
-                   }
-               } /* for jj ... */
-           } /* for each block row ... */
-           /* Scatter the triangular solves into SPA dense[*] */
-           repfnz_col = repfnz;
-           TriTmp = tempv;
-           dense_col = dense;
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               if ( segsze <= 3 ) continue; /* skip unrolled cases */
-               no_zeros = kfnz - fsupc;                
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   dense_col[irow] = TriTmp[i];
-                   TriTmp[i] = zero;
-                   ++isub;
-               }
-           } /* for jj ... */
-       } else { /* 1-D block modification */
-           /* Sequence through each column in the panel */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-               ops[TRSV] += segsze * (segsze - 1);
-               ops[GEMV] += 2 * nrow * segsze;
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       dense_col[irow] -= ukj * lusup[luptr];
-                       ++luptr;
-                   }
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr1 = luptr - nsupr;
-                   if ( segsze == 2 ) {
-                       ukj -= ukj1 * lusup[luptr1];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr;  ++luptr1;
-                           dense_col[irow] -= (ukj*lusup[luptr]
-                                               + ukj1*lusup[luptr1]);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       ukj1 -= ukj2 * lusup[luptr2-1];
-                       ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr; ++luptr1; ++luptr2;
-                           dense_col[irow] -= ( ukj*lusup[luptr]
-                             + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                       }
-                   }
-               } else  { /* segsze >= 4 */
-                   /* 
-                    * Perform a triangular solve and block update,
-                    * then scatter the result of sup-col update to dense[].
-                    */
-                   no_zeros = kfnz - fsupc;
-                   /* Copy U[*,j] segment from dense[*] to tempv[*]: 
-                    *    The result of triangular solve is in tempv[*];
-                    *    The result of matrix vector update is in dense_col[*]
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       tempv[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-#ifdef _CRAY
-                   STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-                   dtrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-                   luptr += segsze;    /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                    alpha = one;
-                    beta = zero;
-#ifdef _CRAY
-                   SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-                   dgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-                   dlsolve ( nsupr, segsze, &lusup[luptr], tempv );
-                   luptr += segsze;        /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                   dmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
-                   /* Scatter tempv[*] into SPA dense[*] temporarily, such
-                    * that tempv[*] can be used for the triangular solve of
-                    * the next column of the panel. They will be copied into 
-                    * ucol[*] after the whole panel has been finished.
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] = tempv[i];
-                       tempv[i] = zero;
-                       isub++;
-                   }
-                   /* Scatter the update from tempv1[*] into SPA dense[*] */
-                   /* Start dense rectangular L */
-                   for (i = 0; i < nrow; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] -= tempv1[i];
-                       tempv1[i] = zero;
-                       ++isub; 
-                   }
-               } /* else segsze>=4 ... */
-           } /* for each column in the panel... */
-       } /* else 1-D update ... */
-    } /* for each updating supernode ... */
diff --git a/superlu/dpanel_dfs.c b/superlu/dpanel_dfs.c
deleted file mode 100644
index 75783bc5..00000000
--- a/superlu/dpanel_dfs.c
+++ /dev/null
@@ -1,256 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_ddefs.h"
-dpanel_dfs (
-          const int  m,           /* in - number of rows in the matrix */
-          const int  w,           /* in */
-          const int  jcol,        /* in */
-          SuperMatrix *A,       /* in - original matrix */
-          int        *perm_r,     /* in */
-          int        *nseg,       /* out */
-          double     *dense,      /* out */
-          int        *panel_lsub, /* out */
-          int        *segrep,     /* out */
-          int        *repfnz,     /* out */
-          int        *xprune,     /* out */
-          int        *marker,     /* out */     
-          int        *parent,     /* working array */
-          int        *xplore,     /* working array */
-          GlobalLU_t *Glu         /* modified */
-          )
- * Purpose
- * =======
- *
- *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives.
- *
- *   The routine returns one list of the supernodal representatives
- *   in topological order of the dfs that generates them. This list is
- *   a superset of the topological order of each individual column within
- *   the panel. 
- *   The location of the first nonzero in each supernodal segment
- *   (supernodal entry location) is also returned. Each column has a 
- *   separate list for this purpose.
- *
- *   Two marker arrays are used for dfs:
- *     marker[i] == jj, if i was visited during dfs of current column jj;
- *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
- *
- *   marker: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- */
-    NCPformat *Astore;
-    double    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int              krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
-    int       k, krow, kmark, kperm;
-    int       xdfs, maxdfs, kpar;
-    int       jj;         /* index through each column in the panel */
-    int       *marker1;           /* marker1[jj] >= jcol if vertex jj was 
-                             by a previous column within this panel.   */
-    int       *repfnz_col; /* start of each column in the panel */
-    double    *dense_col;  /* start of each column in the panel */
-    int       nextl_col;   /* next available position in panel_lsub[*,jj] */
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    /* Initialize pointers */
-    Astore     = A->Store;
-    a          = Astore->nzval;
-    asub       = Astore->rowind;
-    xa_begin   = Astore->colbeg;
-    xa_end     = Astore->colend;
-    marker1    = marker + m;
-    repfnz_col = repfnz;
-    dense_col  = dense;
-    *nseg      = 0;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    /* For each column in the panel */
-    for (jj = jcol; jj < jcol + w; jj++) {
-       nextl_col = (jj - jcol) * m;
-#ifdef CHK_DFS
-       printf("\npanel col %d: ", jj);
-       /* For each nonz in A[*,jj] do dfs */
-       for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
-           krow = asub[k];
-            dense_col[krow] = a[k];
-           kmark = marker[krow];       
-           if ( kmark == jj ) 
-               continue;     /* krow visited before, go to the next nonzero */
-           /* For each unmarked nbr krow of jj
-            * krow is in L: place it in structure of L[*,jj]
-            */
-           marker[krow] = jj;
-           kperm = perm_r[krow];
-           if ( kperm == EMPTY ) {
-               panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
-           }
-           /* 
-            * krow is in U: if its supernode-rep krep
-            * has been explored, update repfnz[*]
-            */
-           else {
-               krep = xsup[supno[kperm]+1] - 1;
-               myfnz = repfnz_col[krep];
-#ifdef CHK_DFS
-               printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, 
-               if ( myfnz != EMPTY ) { /* Representative visited before */
-                   if ( myfnz > kperm ) repfnz_col[krep] = kperm;
-                   /* continue; */
-               }
-               else {
-                   /* Otherwise, perform dfs starting at krep */
-                   oldrep = EMPTY;
-                   parent[krep] = oldrep;
-                   repfnz_col[krep] = kperm;
-                   xdfs = xlsub[krep];
-                   maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                   printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
-                   for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                   printf("\n");
-                   do {
-                       /* 
-                        * For each unmarked kchild of krep 
-                        */
-                       while ( xdfs < maxdfs ) {
-                           kchild = lsub[xdfs];
-                           xdfs++;
-                           chmark = marker[kchild];
-                           if ( chmark != jj ) { /* Not reached yet */
-                               marker[kchild] = jj;
-                               chperm = perm_r[kchild];
-                               /* Case kchild is in L: place it in L[*,j] */
-                               if ( chperm == EMPTY ) {
-                                   panel_lsub[nextl_col++] = kchild;
-                               } 
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               else {
-                                   chrep = xsup[supno[chperm]+1] - 1;
-                                   myfnz = repfnz_col[chrep];
-#ifdef CHK_DFS
-                                   printf("chrep %d,myfnz %d,perm_r[%d] 
-                                   if ( myfnz != EMPTY ) { /* Visited before */
-                                       if ( myfnz > chperm )
-                                           repfnz_col[chrep] = chperm;
-                                   }
-                                   else {
-                                       /* Cont. dfs at snode-rep of kchild */
-                                       xplore[krep] = xdfs;    
-                                       oldrep = krep;
-                                       krep = chrep; /* Go deeper down G(L) */
-                                       parent[krep] = oldrep;
-                                       repfnz_col[krep] = chperm;
-                                       xdfs = xlsub[krep];     
-                                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                                       printf("  xdfs %d, maxdfs %d: ", xdfs, 
-                                       for (i = xdfs; i < maxdfs; i++) 
printf(" %d", lsub[i]); 
-                                       printf("\n");
-                                   } /* else */
-                               } /* else */
-                           } /* if... */
-                       } /* while xdfs < maxdfs */
-                       /* krow has no more unexplored nbrs:
-                        *    Place snode-rep krep in postorder DFS, if this 
-                        *    segment is seen for the first time. (Note that
-                        *    "repfnz[krep]" may change later.)
-                        *    Backtrack dfs to its parent.
-                        */
-                       if ( marker1[krep] < jcol ) {
-                           segrep[*nseg] = krep;
-                           ++(*nseg);
-                           marker1[krep] = jj;
-                       }
-                       kpar = parent[krep]; /* Pop stack, mimic recursion */
-                       if ( kpar == EMPTY ) break; /* dfs done */
-                       krep = kpar;
-                       xdfs = xplore[krep];
-                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                       printf("  pop stack: krep %d,xdfs %d,maxdfs %d: ", 
-                       for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                       printf("\n");
-                   } while ( kpar != EMPTY ); /* do-while - until empty stack 
-               } /* else */
-           } /* else */
-       } /* for each nonz in A[*,jj] */
-       repfnz_col += m;    /* Move to next column */
-        dense_col += m;
-    } /* for jj ... */
diff --git a/superlu/dpivotL.c b/superlu/dpivotL.c
deleted file mode 100644
index 6ba206b1..00000000
--- a/superlu/dpivotL.c
+++ /dev/null
@@ -1,170 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <math.h>
-#include <stdlib.h>
-#include "slu_ddefs.h"
-#undef DEBUG
-        const int  jcol,     /* in */
-        const double u,      /* in - diagonal pivoting threshold */
-        int        *usepr,   /* re-use the pivot sequence given by 
perm_r/iperm_r */
-        int        *perm_r,  /* may be modified */
-        int        *iperm_r, /* in - inverse of perm_r */
-        int        *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
-        int        *pivrow,  /* out */
-        GlobalLU_t *Glu,     /* modified - global LU data structures */
-       SuperLUStat_t *stat  /* output */
-       )
- * Purpose
- * =======
- *   Performs the numerical pivoting on the current column of L,
- *   and the CDIV operation.
- *
- *   Pivot policy:
- *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
- *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
- *           pivot row = k;
- *       ELSE IF abs(A_jj) >= thresh THEN
- *           pivot row = j;
- *       ELSE
- *           pivot row = m;
- * 
- *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
- *
- *   Return value: 0      success;
- *                 i > 0  U(i,i) is exactly zero.
- *
- */
-    int          fsupc;            /* first column in the supernode */
-    int          nsupc;            /* no of columns in the supernode */
-    int          nsupr;     /* no of rows in the supernode */
-    int          lptr;     /* points to the starting subscript of the 
supernode */
-    int          pivptr, old_pivptr, diag, diagind;
-    double       pivmax, rtemp, thresh;
-    double       temp;
-    double       *lu_sup_ptr; 
-    double       *lu_col_ptr;
-    int          *lsub_ptr;
-    int          isub, icol, k, itemp;
-    int          *lsub, *xlsub;
-    double       *lusup;
-    int          *xlusup;
-    flops_t      *ops = stat->ops;
-    /* Initialize pointers */
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
-    nsupc      = jcol - fsupc;         /* excluding jcol; nsupc >= 0 */
-    lptr       = xlsub[fsupc];
-    nsupr      = xlsub[fsupc+1] - lptr;
-    lu_sup_ptr = &lusup[xlusup[fsupc]];        /* start of the current 
supernode */
-    lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
-    lsub_ptr   = &lsub[lptr];  /* start of row indices of the supernode */
-#ifdef DEBUG
-if ( jcol == MIN_COL ) {
-    printf("Before cdiv: col %d\n", jcol);
-    for (k = nsupc; k < nsupr; k++) 
-       printf("  lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
-    /* Determine the largest abs numerical value for partial pivoting;
-       Also search for user-specified pivot, and diagonal element. */
-    if ( *usepr ) *pivrow = iperm_r[jcol];
-    diagind = iperm_c[jcol];
-    pivmax = 0.0;
-    pivptr = nsupc;
-    diag = EMPTY;
-    old_pivptr = nsupc;
-    for (isub = nsupc; isub < nsupr; ++isub) {
-       rtemp = fabs (lu_col_ptr[isub]);
-       if ( rtemp > pivmax ) {
-           pivmax = rtemp;
-           pivptr = isub;
-       }
-       if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
-       if ( lsub_ptr[isub] == diagind ) diag = isub;
-    }
-    /* Test for singularity */
-    if ( pivmax == 0.0 ) {
-       *pivrow = lsub_ptr[pivptr];
-       perm_r[*pivrow] = jcol;
-       *usepr = 0;
-       return (jcol+1);
-    }
-    thresh = u * pivmax;
-    /* Choose appropriate pivotal element by our policy. */
-    if ( *usepr ) {
-        rtemp = fabs (lu_col_ptr[old_pivptr]);
-       if ( rtemp != 0.0 && rtemp >= thresh )
-           pivptr = old_pivptr;
-       else
-           *usepr = 0;
-    }
-    if ( *usepr == 0 ) {
-       /* Use diagonal pivot? */
-       if ( diag >= 0 ) { /* diagonal exists */
-           rtemp = fabs (lu_col_ptr[diag]);
-           if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
-        }
-       *pivrow = lsub_ptr[pivptr];
-    }
-    /* Record pivot row */
-    perm_r[*pivrow] = jcol;
-    /* Interchange row subscripts */
-    if ( pivptr != nsupc ) {
-       itemp = lsub_ptr[pivptr];
-       lsub_ptr[pivptr] = lsub_ptr[nsupc];
-       lsub_ptr[nsupc] = itemp;
-       /* Interchange numerical values as well, for the whole snode, such 
-        * that L is indexed the same way as A.
-        */
-       for (icol = 0; icol <= nsupc; icol++) {
-           itemp = pivptr + icol * nsupr;
-           temp = lu_sup_ptr[itemp];
-           lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
-           lu_sup_ptr[nsupc + icol*nsupr] = temp;
-       }
-    } /* if */
-    /* cdiv operation */
-    ops[FACT] += nsupr - nsupc;
-    temp = 1.0 / lu_col_ptr[nsupc];
-    for (k = nsupc+1; k < nsupr; k++) 
-       lu_col_ptr[k] *= temp;
-    return 0;
diff --git a/superlu/dpivotgrowth.c b/superlu/dpivotgrowth.c
deleted file mode 100644
index 6e641199..00000000
--- a/superlu/dpivotgrowth.c
+++ /dev/null
@@ -1,129 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <math.h>
-#include "slu_ddefs.h"
-dPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
-             SuperMatrix *L, SuperMatrix *U)
- * Purpose
- * =======
- *
- * Compute the reciprocal pivot growth factor of the leading ncols columns
- * of the matrix, using the formula:
- *     min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) )
- *
- * Arguments
- * =========
- *
- * ncols    (input) int
- *          The number of columns of matrices A, L and U.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = NC; Dtype = SLU_D; Mtype = GE.
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SC; Dtype = SLU_D; Mtype = TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = NC;
- *          Dtype = SLU_D; Mtype = TRU.
- *
- */
-    NCformat *Astore;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double  *Aval, *Lval, *Uval;
-    int      fsupc, nsupr, luptr, nz_in_U;
-    int      i, j, k, oldcol;
-    int      *inv_perm_c;
-    double   rpg, maxaj, maxuj;
-    extern   double dlamch_(char *);
-    double   smlnum;
-    double   *luval;
-    /* Get machine constants. */
-    smlnum = dlamch_("S");
-    rpg = 1. / smlnum;
-    Astore = A->Store;
-    Lstore = L->Store;
-    Ustore = U->Store;
-    Aval = Astore->nzval;
-    Lval = Lstore->nzval;
-    Uval = Ustore->nzval;
-    inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int));
-    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;
-    for (k = 0; k <= Lstore->nsuper; ++k) {
-       fsupc = L_FST_SUPC(k);
-       nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-       luptr = L_NZ_START(fsupc);
-       luval = &Lval[luptr];
-       nz_in_U = 1;
-       for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) {
-           maxaj = 0.;
-            oldcol = inv_perm_c[j];
-           for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
-               maxaj = SUPERLU_MAX( maxaj, fabs(Aval[i]) );
-           maxuj = 0.;
-           for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
-               maxuj = SUPERLU_MAX( maxuj, fabs(Uval[i]) );
-           /* Supernode */
-           for (i = 0; i < nz_in_U; ++i)
-               maxuj = SUPERLU_MAX( maxuj, fabs(luval[i]) );
-           ++nz_in_U;
-           luval += nsupr;
-           if ( maxuj == 0. )
-               rpg = SUPERLU_MIN( rpg, 1.);
-           else
-               rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
-       }
-       if ( j >= ncols ) break;
-    }
-    SUPERLU_FREE(inv_perm_c);
-    return (rpg);
diff --git a/superlu/dpruneL.c b/superlu/dpruneL.c
deleted file mode 100644
index 670d061c..00000000
--- a/superlu/dpruneL.c
+++ /dev/null
@@ -1,156 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_ddefs.h"
-       const int  jcol,             /* in */
-       const int  *perm_r,   /* in */
-       const int  pivrow,    /* in */
-       const int  nseg,             /* in */
-       const int  *segrep,   /* in */
-       const int  *repfnz,   /* in */
-       int        *xprune,   /* out */
-       GlobalLU_t *Glu       /* modified - global LU data structures */
-       )
- * Purpose
- * =======
- *   Prunes the L-structure of supernodes whose L-structure
- *   contains the current pivot row "pivrow"
- *
- */
-    double     utemp;
-    int        jsupno, irep, irep1, kmin, kmax, krow, movnum;
-    int        i, ktemp, minloc, maxloc;
-    int        do_prune; /* logical variable */
-    int        *xsup, *supno;
-    int        *lsub, *xlsub;
-    double     *lusup;
-    int        *xlusup;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    /*
-     * For each supernode-rep irep in U[*,j]
-     */
-    jsupno = supno[jcol];
-    for (i = 0; i < nseg; i++) {
-       irep = segrep[i];
-       irep1 = irep + 1;
-       do_prune = FALSE;
-       /* Don't prune with a zero U-segment */
-       if ( repfnz[irep] == EMPTY )
-               continue;
-       /* If a snode overlaps with the next panel, then the U-segment 
-        * is fragmented into two parts -- irep and irep1. We should let
-        * pruning occur at the rep-column in irep1's snode. 
-        */
-       if ( supno[irep] == supno[irep1] )      /* Don't prune */
-               continue;
-       /*
-        * If it has not been pruned & it has a nonz in row L[pivrow,i]
-        */
-       if ( supno[irep] != jsupno ) {
-           if ( xprune[irep] >= xlsub[irep1] ) {
-               kmin = xlsub[irep];
-               kmax = xlsub[irep1] - 1;
-               for (krow = kmin; krow <= kmax; krow++) 
-                   if ( lsub[krow] == pivrow ) {
-                       do_prune = TRUE;
-                       break;
-                   }
-           }
-           if ( do_prune ) {
-               /* Do a quicksort-type partition
-                * movnum=TRUE means that the num values have to be exchanged.
-                */
-               movnum = FALSE;
-               if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
-                       movnum = TRUE;
-               while ( kmin <= kmax ) {
-                   if ( perm_r[lsub[kmax]] == EMPTY ) 
-                       kmax--;
-                   else if ( perm_r[lsub[kmin]] != EMPTY )
-                       kmin++;
-                   else { /* kmin below pivrow, and kmax above pivrow: 
-                           *   interchange the two subscripts
-                           */
-                       ktemp = lsub[kmin];
-                       lsub[kmin] = lsub[kmax];
-                       lsub[kmax] = ktemp;
-                       /* If the supernode has only one column, then we
-                        * only keep one set of subscripts. For any subscript 
-                        * interchange performed, similar interchange must be 
-                        * done on the numerical values.
-                        */
-                       if ( movnum ) {
-                           minloc = xlusup[irep] + (kmin - xlsub[irep]);
-                           maxloc = xlusup[irep] + (kmax - xlsub[irep]);
-                           utemp = lusup[minloc];
-                           lusup[minloc] = lusup[maxloc];
-                           lusup[maxloc] = utemp;
-                       }
-                       kmin++;
-                       kmax--;
-                   }
-               } /* while */
-               xprune[irep] = kmin;    /* Pruning */
-#ifdef CHK_PRUNE
-       printf("    After dpruneL(),using col %d:  xprune[%d] = %d\n", 
-                       jcol, irep, kmin);
-           } /* if do_prune */
-       } /* if */
-    } /* for each U-segment... */
diff --git a/superlu/dreadhb.c b/superlu/dreadhb.c
deleted file mode 100644
index fb22c543..00000000
--- a/superlu/dreadhb.c
+++ /dev/null
@@ -1,277 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_ddefs.h"
-/* Eat up the rest of the current line */
-int dDumpLine(FILE *fp)
-    register int c;
-    while ((c = fgetc(fp)) != '\n') ;
-    return 0;
-int dParseIntFormat(char *buf, int *num, int *size)
-    char *tmp;
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    sscanf(tmp, "%d", num);
-    while (*tmp != 'I' && *tmp != 'i') ++tmp;
-    ++tmp;
-    sscanf(tmp, "%d", size);
-    return 0;
-int dParseFloatFormat(char *buf, int *num, int *size)
-    char *tmp, *period;
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-    while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd'
-          && *tmp != 'F' && *tmp != 'f') {
-        /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the
-           num picked up refers to P, which should be skipped. */
-        if (*tmp=='p' || *tmp=='P') {
-           ++tmp;
-           *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-        } else {
-           ++tmp;
-        }
-    }
-    ++tmp;
-    period = tmp;
-    while (*period != '.' && *period != ')') ++period ;
-    *period = '\0';
-    *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/
-    return 0;
-int dReadVector(FILE *fp, int n, int *where, int perline, int persize)
-    register int i, j, item;
-    char tmp, buf[100], *dummy;
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           item = atoi(&buf[j*persize]); 
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-           where[i++] = item - 1;
-       }
-    }
-    return 0;
-int dReadValues(FILE *fp, int n, double *destination, int perline, int persize)
-    register int i, j, k, s;
-    char tmp, buf[100], *dummy;
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           s = j*persize;
-           for (k = 0; k < persize; ++k) /* No D_ format in C */
-               if ( buf[s+k] == 'D' || buf[s+k] == 'd' ) buf[s+k] = 'E';
-           destination[i++] = atof(&buf[s]);
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-       }
-    }
-    return 0;
-dreadhb(int *nrow, int *ncol, int *nonz,
-       double **nzval, int **rowind, int **colptr)
- * Purpose
- * =======
- * 
- * Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format 
- * as described below.
- * 
- * Line 1 (A72,A8) 
- *     Col. 1 - 72   Title (TITLE) 
- *     Col. 73 - 80  Key (KEY) 
- * 
- * Line 2 (5I14) 
- *     Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
- *     Col. 15 - 28  Number of lines for pointers (PTRCRD) 
- *     Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
- *     Col. 43 - 56  Number of lines for numerical values (VALCRD) 
- *     Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
- *                    (including starting guesses and solution vectors 
- *                    if present) 
- *                   (zero indicates no right-hand side data is present) 
- *
- * Line 3 (A3, 11X, 4I14) 
- *     Col. 1 - 3    Matrix type (see below) (MXTYPE) 
- *     Col. 15 - 28  Number of rows (or variables) (NROW) 
- *     Col. 29 - 42  Number of columns (or elements) (NCOL) 
- *     Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
- *                   (equal to number of entries for assembled matrices) 
- *     Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
- *                   (zero in the case of assembled matrices) 
- * Line 4 (2A16, 2A20) 
- *     Col. 1 - 16   Format for pointers (PTRFMT) 
- *     Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
- *     Col. 33 - 52  Format for numerical values of coefficient matrix 
- *     Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
- *
- * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
- *     Col. 1        Right-hand side type: 
- *                       F for full storage or M for same format as matrix 
- *     Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
- *     Col. 3        X if an exact solution vector(s) is supplied. 
- *     Col. 15 - 28  Number of right-hand sides (NRHS) 
- *     Col. 29 - 42  Number of row indices (NRHSIX) 
- *                   (ignored in case of unassembled matrices) 
- *
- * The three character type field on line 3 describes the matrix type. 
- * The following table lists the permitted values for each of the three 
- * characters. As an example of the type field, RSA denotes that the matrix 
- * is real, symmetric, and assembled. 
- *
- * First Character: 
- *     R Real matrix 
- *     C Complex matrix 
- *     P Pattern only (no numerical values supplied) 
- *
- * Second Character: 
- *     S Symmetric 
- *     U Unsymmetric 
- *     H Hermitian 
- *     Z Skew symmetric 
- *     R Rectangular 
- *
- * Third Character: 
- *     A Assembled 
- *     E Elemental matrices (unassembled) 
- *
- */
-    register int i, numer_lines = 0, rhscrd = 0;
-    int tmp, colnum, colsize, rownum, rowsize, valnum, valsize, dummy;
-    char buf[100], type[4], key[10], *dummyc;
-    FILE *fp;
-    fp = stdin;
-    /* Line 1 */
-    dummyc = fgets(buf, 100, fp);
-    fputs(buf, stdout);
-#if 0
-    dummy = fscanf(fp, "%72c", buf); buf[72] = 0;
-    printf("Title: %s", buf);
-    dummy += fscanf(fp, "%8c", key);  key[8] = 0;
-    printf("Key: %s\n", key);
-    dDumpLine(fp);
-    /* Line 2 */
-    for (i=0; i<5; i++) {
-       dummy += fscanf(fp, "%14c", buf); buf[14] = 0;
-       sscanf(buf, "%d", &tmp);
-       if (i == 3) numer_lines = tmp;
-       if (i == 4 && tmp) rhscrd = tmp;
-    }
-    dDumpLine(fp);
-    /* Line 3 */
-    dummy += fscanf(fp, "%3c", type);
-    dummy += fscanf(fp, "%11c", buf); /* pad */
-    type[3] = 0;
-#ifdef DEBUG
-    printf("Matrix type %s\n", type);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nrow);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", ncol);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nonz);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", &tmp);
-    if (tmp != 0)
-         printf("This is not an assembled matrix!\n");
-    if (*nrow != *ncol)
-       printf("Matrix is not square.\n");
-    dDumpLine(fp);
-    /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */
-    dallocateA(*ncol, *nonz, nzval, rowind, colptr);
-    /* Line 4: format statement */
-    dummy += fscanf(fp, "%16c", buf);
-    dParseIntFormat(buf, &colnum, &colsize);
-    dummy += fscanf(fp, "%16c", buf);
-    dParseIntFormat(buf, &rownum, &rowsize);
-    dummy += fscanf(fp, "%20c", buf);
-    dParseFloatFormat(buf, &valnum, &valsize);
-    dummy += fscanf(fp, "%20c", buf);
-    dDumpLine(fp);
-    /* Line 5: right-hand side */    
-    if ( rhscrd ) dDumpLine(fp); /* skip RHSFMT */
-#ifdef DEBUG
-    printf("%d rows, %d nonzeros\n", *nrow, *nonz);
-    printf("colnum %d, colsize %d\n", colnum, colsize);
-    printf("rownum %d, rowsize %d\n", rownum, rowsize);
-    printf("valnum %d, valsize %d\n", valnum, valsize);
-    dReadVector(fp, *ncol+1, *colptr, colnum, colsize);
-    dReadVector(fp, *nonz, *rowind, rownum, rowsize);
-    if ( numer_lines ) {
-        dReadValues(fp, *nonz, *nzval, valnum, valsize);
-    }
-    fclose(fp);
diff --git a/superlu/dsnode_bmod.c b/superlu/dsnode_bmod.c
deleted file mode 100644
index e2bac68c..00000000
--- a/superlu/dsnode_bmod.c
+++ /dev/null
@@ -1,114 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_ddefs.h"
-extern void dtrsv_();
-extern void dgemv_();
- * Performs numeric block updates within the relaxed snode. 
- */
-dsnode_bmod (
-           const int  jcol,      /* in */
-           const int  jsupno,    /* in */
-           const int  fsupc,     /* in */
-           double     *dense,    /* in */
-           double     *tempv,    /* working array */
-           GlobalLU_t *Glu,      /* modified */
-           SuperLUStat_t *stat   /* output */
-           )
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-    int            incx = 1, incy = 1;
-    double         alpha = -1.0, beta = 1.0;
-    int            luptr, nsupc, nsupr, nrow;
-    int            isub, irow, i, iptr; 
-    register int   ufirst, nextlu;
-    int            *lsub, *xlsub;
-    double         *lusup;
-    int            *xlusup;
-    flops_t *ops = stat->ops;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nextlu = xlusup[jcol];
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-       dense[irow] = 0;
-       ++nextlu;
-    }
-    xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
-    if ( fsupc < jcol ) {
-       luptr = xlusup[fsupc];
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nsupc = jcol - fsupc;   /* Excluding jcol */
-       ufirst = xlusup[jcol];  /* Points to the beginning of column
-                                  jcol in supernode L\U(jsupno). */
-       nrow = nsupr - nsupc;
-       ops[TRSV] += nsupc * (nsupc - 1);
-       ops[GEMV] += 2 * nrow * nsupc;
-#ifdef _CRAY
-       STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       dtrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       dgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       dlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       dmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
-                       &lusup[ufirst], &tempv[0] );
-        /* Scatter tempv[*] into lusup[*] */
-       iptr = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           lusup[iptr++] -= tempv[i];
-           tempv[i] = 0.0;
-       }
-    }
-    return 0;
diff --git a/superlu/dsnode_dfs.c b/superlu/dsnode_dfs.c
deleted file mode 100644
index d1c3c483..00000000
--- a/superlu/dsnode_dfs.c
+++ /dev/null
@@ -1,113 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_ddefs.h"
-dsnode_dfs (
-          const int  jcol,         /* in - start of the supernode */
-          const int  kcol,         /* in - end of the supernode */
-          const int  *asub,        /* in */
-          const int  *xa_begin,    /* in */
-          const int  *xa_end,      /* in */
-          int        *xprune,      /* out */
-          int        *marker,      /* modified */
-          GlobalLU_t *Glu          /* modified */
-          )
-/* Purpose
- * =======
- *    dsnode_dfs() - Determine the union of the row structures of those 
- *    columns within the relaxed snode.
- *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
- *    the portion outside the rectangular supernode must be zero.
- *
- * Return value
- * ============
- *     0   success;
- *    >0   number of bytes allocated when run out of memory.
- *
- */
-    register int i, k, ifrom, ito, nextl, new_next;
-    int          nsuper, krow, kmark, mem_error;
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    int          nzlmax;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-    nsuper = ++supno[jcol];    /* Next available supernode number */
-    nextl = xlsub[jcol];
-    for (i = jcol; i <= kcol; i++) {
-       /* For each nonzero in A[*,i] */
-       for (k = xa_begin[i]; k < xa_end[i]; k++) {     
-           krow = asub[k];
-           kmark = marker[krow];
-           if ( kmark != kcol ) { /* First time visit krow */
-               marker[krow] = kcol;
-               lsub[nextl++] = krow;
-               if ( nextl >= nzlmax ) {
-                   if ( mem_error = dLUMemXpand(jcol, nextl, LSUB, &nzlmax, 
Glu) )
-                       return (mem_error);
-                   lsub = Glu->lsub;
-               }
-           }
-       }
-       supno[i] = nsuper;
-    }
-    /* Supernode > 1, then make a copy of the subscripts for pruning */
-    if ( jcol < kcol ) {
-       new_next = nextl + (nextl - xlsub[jcol]);
-       while ( new_next > nzlmax ) {
-           if ( mem_error = dLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-               return (mem_error);
-           lsub = Glu->lsub;
-       }
-       ito = nextl;
-       for (ifrom = xlsub[jcol]; ifrom < nextl; )
-           lsub[ito++] = lsub[ifrom++];        
-        for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
-       nextl = ito;
-    }
-    xsup[nsuper+1] = kcol + 1;
-    supno[kcol+1]  = nsuper;
-    xprune[kcol]   = nextl;
-    xlsub[kcol+1]  = nextl;
-    return 0;
diff --git a/superlu/dsp_blas2.c b/superlu/dsp_blas2.c
deleted file mode 100644
index 5ef8c228..00000000
--- a/superlu/dsp_blas2.c
+++ /dev/null
@@ -1,498 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * File name:          dsp_blas2.c
- * Purpose:            Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-#include "slu_ddefs.h"
-extern void dtrsv_();
-extern void dgemv_();
- * Function prototypes 
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-sp_dtrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info)
- *   Purpose
- *   =======
- *
- *   sp_dtrsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.
- *                trans = 'C' or 'c'   A'*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *          
- *   L       - (input) SuperMatrix*
- *            The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_D, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *             The factor U from the factorization Pr*A*Pc=L*U.
- *             U has types: Stype = NC, Dtype = SLU_D, Mtype = TRU.
- *    
- *   x       - (input/output) double*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    double alpha = 1.0, beta = 1.0;
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    double *work;
-    flops_t solve_ops;
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 
-              !lsame_(trans, "C")) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("sp_dtrsv", &i);
-       return 0;
-    }
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    if ( !(work = doubleCalloc(L->nrow)) )
-       ABORT("Malloc fails for work in sp_dtrsv().");
-    if ( lsame_(trans, "N") ) {        /* Form x := inv(A)*x. */
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L)*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               nrow = nsupr - nsupc;
-               solve_ops += nsupc * (nsupc - 1);
-               solve_ops += 2 * nrow * nsupc;
-               if ( nsupc == 1 ) {
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       x[irow] -= x[fsupc] * Lval[luptr];
-                   }
-               } else {
-#ifdef _CRAY
-                   STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-                   if (nsupr < nsupc) {
-                     fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d\n", 
nsupc, nsupr);
-                     return (*info = -10000000);
-                   }
-                   dtrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   dgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-                   dlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-                   dmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; ++i, ++iptr) {
-                       irow = L_SUB(iptr);
-                       x[irow] -= work[i];     /* Scatter */
-                       work[i] = 0.0;
-                   }
-               }
-           } /* for k ... */
-       } else {
-           /* Form x := inv(U)*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           for (k = Lstore->nsuper; k >= 0; k--) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               solve_ops += nsupc * (nsupc + 1);
-               if ( nsupc == 1 ) {
-                   x[fsupc] /= Lval[luptr];
-                   for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-                       irow = U_SUB(i);
-                       x[irow] -= x[fsupc] * Uval[i];
-                   }
-               } else {
-#ifdef _CRAY
-                   STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-                      &x[fsupc], &incx);
-                   if (nsupr < nsupc) {
-                     fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d\n", 
nsupc, nsupr);
-                     return (*info = -10000000);
-                   }
-                   dtrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-                   dusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-                   for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                       solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                       for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-                               i++) {
-                           irow = U_SUB(i);
-                           x[irow] -= x[jcol] * Uval[i];
-                       }
-                    }
-               }
-           } /* for k ... */
-       }
-    } else { /* Form x := inv(A')*x */
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L')*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               solve_ops += 2 * (nsupr - nsupc) * nsupc;
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                       x[jcol] -= x[irow] * Lval[i];
-                       iptr++;
-                   }
-               }
-               if ( nsupc > 1 ) {
-                   solve_ops += nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   if (nsupr < nsupc) {
-                     fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d\n", 
nsupc, nsupr);
-                     return (*info = -10000000);
-                   }
-                   dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               }
-           }
-       } else {
-           /* Form x := inv(U')*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                       x[jcol] -= x[irow] * Uval[i];
-                   }
-               }
-               solve_ops += nsupc * (nsupc + 1);
-               if ( nsupc == 1 ) {
-                   x[fsupc] /= Lval[luptr];
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-                   if (nsupr < nsupc) {
-                     fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d\n", 
nsupc, nsupr);
-                     return (*info = -10000000);
-                   }
-                   dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-               }
-           } /* for k ... */
-       }
-    }
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-sp_dgemv(char *trans, double alpha, SuperMatrix *A, double *x, 
-        int incx, double beta, double *y, int incy)
-/*  Purpose   
-    =======   
-    sp_dgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-    Parameters   
-    ==========   
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-    ALPHA  - (input) double
-             On entry, ALPHA specifies the scalar alpha.   
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_D; Mtype = GE. 
-             In the future, more general A can be handled.
-    X      - (input) double*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-    BETA   - (input) double
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-    Y      - (output) double*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-    ==== Sparse Level 2 Blas routine.   
-    /* Local variables */
-    NCformat *Astore;
-    double   *Aval;
-    int info;
-    double temp;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)        info = 8;
-    if (info != 0) {
-       xerbla_("sp_dgemv ", &info);
-       return 0;
-    }
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
-       return 0;
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-       lenx = A->ncol;
-       leny = A->nrow;
-    } else {
-       lenx = A->nrow;
-       leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if (beta != 1.) {
-       if (incy == 1) {
-           if (beta == 0.)
-               for (i = 0; i < leny; ++i) y[i] = 0.;
-           else
-               for (i = 0; i < leny; ++i) y[i] = beta * y[i];
-       } else {
-           iy = ky;
-           if (beta == 0.)
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = 0.;
-                   iy += incy;
-               }
-           else
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = beta * y[iy];
-                   iy += incy;
-               }
-       }
-    }
-    if (alpha == 0.) return 0;
-    if ( notran ) {
-       /* Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (incy == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               if (x[jx] != 0.) {
-                   temp = alpha * x[jx];
-                   for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       y[irow] += temp * Aval[i];
-                   }
-               }
-               jx += incx;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    } else {
-       /* Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (incx == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               temp = 0.;
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   temp += Aval[i] * x[irow];
-               }
-               y[jy] += alpha * temp;
-               jy += incy;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    }
-    return 0;
-} /* sp_dgemv */
diff --git a/superlu/dsp_blas3.c b/superlu/dsp_blas3.c
deleted file mode 100644
index 00d200a6..00000000
--- a/superlu/dsp_blas3.c
+++ /dev/null
@@ -1,141 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:          sp_blas3.c
- * Purpose:            Sparse BLAS3, using some dense BLAS3 operations.
- */
-#include "slu_ddefs.h"
-sp_dgemm(char *transa, char *transb, int m, int n, int k, 
-         double alpha, SuperMatrix *A, double *b, int ldb, 
-         double beta, double *c, int ldc)
-/*  Purpose   
-    =======   
-    sp_d performs one of the matrix-matrix operations   
-       C := alpha*op( A )*op( B ) + beta*C,   
-    where  op( X ) is one of 
-       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
-    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
-    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
-    Parameters   
-    ==========   
-    TRANSA - (input) char*
-             On entry, TRANSA specifies the form of op( A ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSA = 'N' or 'n',  op( A ) = A.   
-                TRANSA = 'T' or 't',  op( A ) = A'.   
-                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
-             Unchanged on exit.   
-    TRANSB - (input) char*
-             On entry, TRANSB specifies the form of op( B ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSB = 'N' or 'n',  op( B ) = B.   
-                TRANSB = 'T' or 't',  op( B ) = B'.   
-                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
-             Unchanged on exit.   
-    M      - (input) int   
-             On entry,  M  specifies  the number of rows of the matrix 
-            op( A ) and of the matrix C.  M must be at least zero. 
-            Unchanged on exit.   
-    N      - (input) int
-             On entry,  N specifies the number of columns of the matrix 
-            op( B ) and the number of columns of the matrix C. N must be 
-            at least zero.
-            Unchanged on exit.   
-    K      - (input) int
-             On entry, K specifies the number of columns of the matrix 
-            op( A ) and the number of rows of the matrix op( B ). K must 
-            be at least  zero.   
-             Unchanged on exit.
-    ALPHA  - (input) double
-             On entry, ALPHA specifies the scalar alpha.   
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_D; Mtype = GE. 
-             In the future, more general A can be handled.
-    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is 
-             n when TRANSB = 'N' or 'n',  and is  k otherwise.   
-             Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
-             part of the array B must contain the matrix B, otherwise 
-             the leading n by k part of the array B must contain the 
-             matrix B.   
-             Unchanged on exit.   
-    LDB    - (input) int
-             On entry, LDB specifies the first dimension of B as declared 
-             in the calling (sub) program. LDB must be at least max( 1, n ).  
-             Unchanged on exit.   
-    BETA   - (input) double
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then C need not be set on input.   
-    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).   
-             Before entry, the leading m by n part of the array C must 
-             contain the matrix C,  except when beta is zero, in which 
-             case C need not be set on entry.   
-             On exit, the array C is overwritten by the m by n matrix 
-            ( alpha*op( A )*B + beta*C ).   
-    LDC    - (input) int
-             On entry, LDC specifies the first dimension of C as declared 
-             in the calling (sub)program. LDC must be at least max(1,m).   
-             Unchanged on exit.   
-    ==== Sparse Level 3 Blas routine.   
-    int    incx = 1, incy = 1;
-    int    j;
-    for (j = 0; j < n; ++j) {
-       sp_dgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
-    }
-    return 0;    
diff --git a/superlu/dutil.c b/superlu/dutil.c
deleted file mode 100644
index bb4c5c4f..00000000
--- a/superlu/dutil.c
+++ /dev/null
@@ -1,479 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <math.h>
-#include "slu_ddefs.h"
-dCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      double *nzval, int *rowind, int *colptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    NCformat *Astore;
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->rowind = rowind;
-    Astore->colptr = colptr;
-dCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      double *nzval, int *colind, int *rowptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    NRformat *Astore;
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->colind = colind;
-    Astore->rowptr = rowptr;
-/* Copy matrix A into matrix B. */
-dCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
-    NCformat *Astore, *Bstore;
-    int      ncol, nnz, i;
-    B->Stype = A->Stype;
-    B->Dtype = A->Dtype;
-    B->Mtype = A->Mtype;
-    B->nrow  = A->nrow;;
-    B->ncol  = ncol = A->ncol;
-    Astore   = (NCformat *) A->Store;
-    Bstore   = (NCformat *) B->Store;
-    Bstore->nnz = nnz = Astore->nnz;
-    for (i = 0; i < nnz; ++i)
-       ((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i];
-    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
-    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
-dCreate_Dense_Matrix(SuperMatrix *X, int m, int n, double *x, int ldx,
-                   Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    DNformat    *Xstore;
-    X->Stype = stype;
-    X->Dtype = dtype;
-    X->Mtype = mtype;
-    X->nrow = m;
-    X->ncol = n;
-    X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
-    Xstore = (DNformat *) X->Store;
-    Xstore->lda = ldx;
-    Xstore->nzval = (double *) x;
-dCopy_Dense_Matrix(int M, int N, double *X, int ldx,
-                       double *Y, int ldy)
- *
- *  Purpose
- *  =======
- *
- *  Copies a two-dimensional matrix X to another matrix Y.
- */
-    int    i, j;
-    for (j = 0; j < N; ++j)
-        for (i = 0; i < M; ++i)
-            Y[i + j*ldy] = X[i + j*ldx];
-dCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 
-                       double *nzval, int *nzval_colptr, int *rowind,
-                       int *rowind_colptr, int *col_to_sup, int *sup_to_col,
-                       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    SCformat *Lstore;
-    L->Stype = stype;
-    L->Dtype = dtype;
-    L->Mtype = mtype;
-    L->nrow = m;
-    L->ncol = n;
-    L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
-    if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
-    Lstore = L->Store;
-    Lstore->nnz = nnz;
-    Lstore->nsuper = col_to_sup[n];
-    Lstore->nzval = nzval;
-    Lstore->nzval_colptr = nzval_colptr;
-    Lstore->rowind = rowind;
-    Lstore->rowind_colptr = rowind_colptr;
-    Lstore->col_to_sup = col_to_sup;
-    Lstore->sup_to_col = sup_to_col;
- * Convert a row compressed storage into a column compressed storage.
- */
-dCompRow_to_CompCol(int m, int n, int nnz, 
-                   double *a, int *colind, int *rowptr,
-                   double **at, int **rowind, int **colptr)
-    register int i, j, col, relpos;
-    int *marker;
-    /* Allocate storage for another copy of the matrix. */
-    *at = (double *) doubleMalloc(nnz);
-    *rowind = (int *) intMalloc(nnz);
-    *colptr = (int *) intMalloc(n+1);
-    marker = (int *) intCalloc(n);
-    /* Get counts of each column of A, and set up column pointers */
-    for (i = 0; i < m; ++i)
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
-    (*colptr)[0] = 0;
-    for (j = 0; j < n; ++j) {
-       (*colptr)[j+1] = (*colptr)[j] + marker[j];
-       marker[j] = (*colptr)[j];
-    }
-    /* Transfer the matrix into the compressed column storage. */
-    for (i = 0; i < m; ++i) {
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
-           col = colind[j];
-           relpos = marker[col];
-           (*rowind)[relpos] = i;
-           (*at)[relpos] = a[j];
-           ++marker[col];
-       }
-    }
-    SUPERLU_FREE(marker);
-dPrint_CompCol_Matrix(char *what, SuperMatrix *A)
-    NCformat     *Astore;
-    register int i,n;
-    double       *dp;
-    printf("\nCompCol matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (NCformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
-    printf("nzval: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%d  ", Astore->rowind[i]);
-    printf("\ncolptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->colptr[i]);
-    printf("\n");
-    fflush(stdout);
-dPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
-    SCformat     *Astore;
-    register int i, j, k, c, d, n, nsup;
-    double       *dp;
-    int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
-    printf("\nSuperNode matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (SCformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    col_to_sup = Astore->col_to_sup;
-    sup_to_col = Astore->sup_to_col;
-    rowind_colptr = Astore->rowind_colptr;
-    rowind = Astore->rowind;
-    printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", 
-          A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
-    printf("nzval:\n");
-    for (k = 0; k <= Astore->nsuper; ++k) {
-      c = sup_to_col[k];
-      nsup = sup_to_col[k+1] - c;
-      for (j = c; j < c + nsup; ++j) {
-       d = Astore->nzval_colptr[j];
-       for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
-         printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
-       }
-      }
-    }
-#if 0
-    for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nnzval_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->nzval_colptr[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->rowind_colptr[n]; ++i) 
-        printf("%d  ", Astore->rowind[i]);
-    printf("\nrowind_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->rowind_colptr[i]);
-    printf("\ncol_to_sup: ");
-    for (i = 0; i < n; ++i) printf("%d  ", col_to_sup[i]);
-    printf("\nsup_to_col: ");
-    for (i = 0; i <= Astore->nsuper+1; ++i) 
-        printf("%d  ", sup_to_col[i]);
-    printf("\n");
-    fflush(stdout);
-dPrint_Dense_Matrix(char *what, SuperMatrix *A)
-    DNformat     *Astore;
-    register int i, j, lda = Astore->lda;
-    double       *dp;
-    printf("\nDense matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    Astore = (DNformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
-    printf("\nnzval: ");
-    for (j = 0; j < A->ncol; ++j) {
-        for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i + j*lda]);
-        printf("\n");
-    }
-    printf("\n");
-    fflush(stdout);
- * Diagnostic print of column "jcol" in the U/L factor.
- */
-dprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
-    int     i, k, fsupc;
-    int     *xsup, *supno;
-    int     *xlsub, *lsub;
-    double  *lusup;
-    int     *xlusup;
-    double  *ucol;
-    int     *usub, *xusub;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    printf("%s", msg);
-    printf("col %d: pivrow %d, supno %d, xprune %d\n", 
-          jcol, pivrow, supno[jcol], xprune[jcol]);
-    printf("\tU-col:\n");
-    for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
-       printf("\t%d%10.4f\n", usub[i], ucol[i]);
-    printf("\tL-col in rectangular snode:\n");
-    fsupc = xsup[supno[jcol]]; /* first col of the snode */
-    i = xlsub[fsupc];
-    k = xlusup[jcol];
-    while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
-       printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
-       i++; k++;
-    }
-    fflush(stdout);
- * Check whether tempv[] == 0. This should be true before and after 
- * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 
- */
-void dcheck_tempv(int n, double *tempv)
-    int i;
-    for (i = 0; i < n; i++) {
-       if (tempv[i] != 0.0) 
-       {
-           fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
-           ABORT("dcheck_tempv");
-       }
-    }
-dGenXtrue(int n, int nrhs, double *x, int ldx)
-    int  i, j;
-    for (j = 0; j < nrhs; ++j)
-       for (i = 0; i < n; ++i) {
-           x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/
-       }
- * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
- */
-dFillRHS(trans_t trans, int nrhs, double *x, int ldx,
-         SuperMatrix *A, SuperMatrix *B)
-    NCformat *Astore;
-    double   *Aval;
-    DNformat *Bstore;
-    double   *rhs;
-    double one = 1.0;
-    double zero = 0.0;
-    int      ldc;
-    char transc[1];
-    Astore = A->Store;
-    Aval   = (double *) Astore->nzval;
-    Bstore = B->Store;
-    rhs    = Bstore->nzval;
-    ldc    = Bstore->lda;
-    if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
-    else *(unsigned char *)transc = 'T';
-    sp_dgemm(transc, "N", A->nrow, nrhs, A->ncol, one, A,
-            x, ldx, zero, rhs, ldc);
- * Fills a double precision array with a given value.
- */
-dfill(double *a, int alen, double dval)
-    register int i;
-    for (i = 0; i < alen; i++) a[i] = dval;
- * Check the inf-norm of the error vector 
- */
-void dinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue)
-    DNformat *Xstore;
-    double err, xnorm;
-    double *Xmat, *soln_work;
-    int i, j;
-    Xstore = X->Store;
-    Xmat = Xstore->nzval;
-    for (j = 0; j < nrhs; j++) {
-      soln_work = &Xmat[j*Xstore->lda];
-      err = xnorm = 0.0;
-      for (i = 0; i < X->nrow; i++) {
-       err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
-       xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
-      }
-      err = err / xnorm;
-      printf("||X - Xtrue||/||X|| = %e\n", err);
-    }
-/* Print performance of the code. */
-dPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
-           double rpg, double rcond, double *ferr,
-           double *berr, char *equed, SuperLUStat_t *stat)
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *utime;
-    flops_t  *ops;
-    utime = stat->utime;
-    ops   = stat->ops;
-    if ( utime[FACT] != 0. )
-       printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
-              ops[FACT]*1e-6/utime[FACT]);
-    printf("Identify relaxed snodes    = %8.2f\n", utime[RELAX]);
-    if ( utime[SOLVE] != 0. )
-       printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
-              ops[SOLVE]*1e-6/utime[SOLVE]);
-    Lstore = (SCformat *) L->Store;
-    Ustore = (NCformat *) U->Store;
-    printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
-    printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
-    printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
-          mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
-          mem_usage->expansions);
-    printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
-    printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
-          utime[FACT], ops[FACT]*1e-6/utime[FACT],
-          utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
-          utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
-    printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
-    printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
-          rpg, rcond, ferr[0], berr[0], equed);
-int print_double_vec(char *what, int n, double *vec)
-    int i;
-    printf("%s: n %d\n", what, n);
-    for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);
-    return 0;
diff --git a/superlu/dzsum1.c b/superlu/dzsum1.c
deleted file mode 100644
index d968326a..00000000
--- a/superlu/dzsum1.c
+++ /dev/null
@@ -1,102 +0,0 @@
-#include "slu_Cnames.h"
-#include "slu_dcomplex.h"
-double dzsum1_(int *n, doublecomplex *cx, int *incx)
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-/*! @file dzsum1.c
- * \brief Takes sum of the absolute values of a complex vector and returns a 
double precision result
- *
- * <pre>
- *     -- LAPACK auxiliary routine (version 2.0) --   
- *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
- *     Courant Institute, Argonne National Lab, and Rice University   
- *     October 31, 1992   
- * </pre>
- */
-    Purpose   
-    =======   
-    DZSUM1 takes the sum of the absolute values of a complex   
-    vector and returns a double precision result.   
-    Based on DZASUM from the Level 1 BLAS.   
-    The change is to use the 'genuine' absolute value.   
-    Contributed by Nick Higham for use with ZLACON.   
-    Arguments   
-    =========   
-    N       (input) INT   
-            The number of elements in the vector CX.   
-    CX      (input) COMPLEX*16 array, dimension (N)   
-            The vector whose elements will be summed.   
-    INCX    (input) INT   
-            The spacing between successive values of CX.  INCX > 0.   
-    ===================================================================== 
-    /* Builtin functions */
-    double z_abs(doublecomplex *);
-    /* Local variables */
-    int i, nincx;
-    double stemp;
-#define CX(I) cx[(I)-1]
-    stemp = 0.;
-    if (*n <= 0) {
-       return stemp;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-    nincx = *n * *incx;
-    for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) {
-       /*        NEXT LINE MODIFIED. */
-       stemp += z_abs(&CX(i));
-/* L10: */
-    }
-    return stemp;
-    for (i = 1; i <= *n; ++i) {
-       /*        NEXT LINE MODIFIED. */
-       stemp += z_abs(&CX(i));
-/* L30: */
-    }
-    return stemp;
-    /*     End of DZSUM1 */
-} /* dzsum1_ */
diff --git a/superlu/f2c_lite.c b/superlu/f2c_lite.c
deleted file mode 100644
index 7dfb0d75..00000000
--- a/superlu/f2c_lite.c
+++ /dev/null
@@ -1,391 +0,0 @@
-  Copyright: 1992-2007 The University of Tennessee.  All rights reserved.
-  License:
-    LAPACK is a freely-available software package. It is available from
-    netlib via anonymous ftp and the World Wide Web. Thus, it can be
-    included in commercial software packages (and has been). We only ask
-    that proper credit be given to the authors.
-    Like all software, it is copyrighted. It is not trademarked, but we do
-    ask the following:
-    If you modify the source for these routines we ask that you change the
-    name of the routine and comment the changes made to the original.
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "BLAS_f2c.h"
-extern void s_wsfe() {;}
-extern void e_wsfe() {;}
-extern void do_fio() {;}
-#ifdef KR_headers
-extern double sqrt();
-double f__cabs(real, imag) double real, imag;
-#undef abs
-double f__cabs(double real, double imag)
-double temp;
-if(real < 0)
-       real = -real;
-if(imag < 0)
-       imag = -imag;
-if(imag > real){
-       temp = real;
-       real = imag;
-       imag = temp;
-if((imag+real) == real)
-       return((double)real);
-temp = imag/real;
-temp = real*sqrt(1.0 + temp*temp);  /*overflow!!*/
-#define log10e 0.43429448190325182765
-#ifdef KR_headers
-double log();
-double d_lg10(x) doublereal *x;
-#undef abs
-double d_lg10(doublereal *x)
-return( log10e * log(*x) );
-#ifdef KR_headers
-double d_sign(a,b) doublereal *a, *b;
-double d_sign(doublereal *a, doublereal *b)
-double x;
-x = (*a >= 0 ? *a : - *a);
-return( *b >= 0 ? x : -x);
-#ifdef KR_headers
-double floor();
-integer i_dnnt(x) doublereal *x;
-#undef abs
-integer i_dnnt(doublereal *x)
-return( (*x)>=0 ?
-       floor(*x + .5) : -floor(.5 - *x) );
-#ifdef KR_headers
-double pow();
-double pow_dd(ap, bp) doublereal *ap, *bp;
-#undef abs
-double pow_dd(doublereal *ap, doublereal *bp)
-return(pow(*ap, *bp) );
-/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the
- * target of a concatenation to appear on its right-hand side (contrary
- * to the Fortran 77 Standard, but in accordance with Fortran 90).
- */
-#undef abs
-#ifdef KR_headers
- extern char *F77_aloc();
- extern void free();
- extern void exit_();
- extern char *F77_aloc(ftnlen, char*);
-#endif /* NO_OVERWRITE */
-#ifdef KR_headers
-s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnlen rnp[], *np, ll;
-s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen *np, ftnlen ll)
-       ftnlen i, nc;
-       char *rp;
-       ftnlen n = *np;
-       ftnlen L, m;
-       char *lp0, *lp1;
-       lp0 = 0;
-       lp1 = lp;
-       L = ll;
-       i = 0;
-       while(i < n) {
-               rp = rpp[i];
-               m = rnp[i++];
-               if (rp >= lp1 || rp + m <= lp) {
-                       if ((L -= m) <= 0) {
-                               n = i;
-                               break;
-                               }
-                       lp1 += m;
-                       continue;
-                       }
-               lp0 = lp;
-               lp = lp1 = F77_aloc(L = ll, "s_cat");
-               break;
-               }
-       lp1 = lp;
-#endif /* NO_OVERWRITE */
-       for(i = 0 ; i < n ; ++i) {
-               nc = ll;
-               if(rnp[i] < nc)
-                       nc = rnp[i];
-               ll -= nc;
-               rp = rpp[i];
-               while(--nc >= 0)
-                       *lp++ = *rp++;
-               }
-       while(--ll >= 0)
-               *lp++ = ' ';
-       if (lp0) {
-               memmove(lp0, lp1, L);
-               free(lp1);
-               }
-       }
-/* compare two strings */
-#ifdef KR_headers
-integer s_cmp(a0, b0, la, lb) char *a0, *b0; ftnlen la, lb;
-integer s_cmp(char *a0, char *b0, ftnlen la, ftnlen lb)
-register unsigned char *a, *aend, *b, *bend;
-a = (unsigned char *)a0;
-b = (unsigned char *)b0;
-aend = a + la;
-bend = b + lb;
-if(la <= lb)
-       {
-       while(a < aend)
-               if(*a != *b)
-                       return( *a - *b );
-               else
-                       { ++a; ++b; }
-       while(b < bend)
-               if(*b != ' ')
-                       return( ' ' - *b );
-               else    ++b;
-       }
-       {
-       while(b < bend)
-               if(*a == *b)
-                       { ++a; ++b; }
-               else
-                       return( *a - *b );
-       while(a < aend)
-               if(*a != ' ')
-                       return(*a - ' ');
-               else    ++a;
-       }
-/* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the
- * target of an assignment to appear on its right-hand side (contrary
- * to the Fortran 77 Standard, but in accordance with Fortran 90),
- * as in  a(2:5) = a(4:7) .
- */
-/* assign strings:  a = b */
-#ifdef KR_headers
-VOID s_copy(a, b, la, lb) register char *a, *b; ftnlen la, lb;
-void s_copy(register char *a, register char *b, ftnlen la, ftnlen lb)
-       register char *aend, *bend;
-       aend = a + la;
-       if(la <= lb)
-               if (a <= b || a >= b + la)
-                       while(a < aend)
-                               *a++ = *b++;
-               else
-                       for(b += la; a < aend; )
-                               *--aend = *--b;
-       else {
-               bend = b + lb;
-               if (a <= b || a >= bend)
-                       while(b < bend)
-                               *a++ = *b++;
-               else {
-                       a += lb;
-                       while(b < bend)
-                               *--a = *--bend;
-                       a += lb;
-                       }
-               while(a < aend)
-                       *a++ = ' ';
-               }
-       }
-#ifdef KR_headers
-double sqrt(), f__cabs();
-VOID z_sqrt(r, z) doublecomplex *r, *z;
-#undef abs
-extern double f__cabs(double, double);
-void z_sqrt(doublecomplex *r, doublecomplex *z)
-double mag;
-if( (mag = f__cabs(z->r, z->i)) == 0.)
-       r->r = r->i = 0.;
-else if(z->r > 0)
-       {
-       r->r = sqrt(0.5 * (mag + z->r) );
-       r->i = z->i / r->r / 2;
-       }
-       {
-       r->i = sqrt(0.5 * (mag - z->r) );
-       if(z->i < 0)
-               r->i = - r->i;
-       r->r = z->i / r->i / 2;
-       }
-#ifdef __cplusplus
-extern "C" {
-#ifdef KR_headers
-integer pow_ii(ap, bp) integer *ap, *bp;
-integer pow_ii(integer *ap, integer *bp)
-       integer pow, x, n;
-       unsigned long u;
-       x = *ap;
-       n = *bp;
-       if (n <= 0) {
-               if (n == 0 || x == 1)
-                       return 1;
-               if (x != -1)
-                       return x == 0 ? 1/x : 0;
-               n = -n;
-               }
-       u = n;
-       for(pow = 1; ; )
-               {
-               if(u & 01)
-                       pow *= x;
-               if(u >>= 1)
-                       x *= x;
-               else
-                       break;
-               }
-       return(pow);
-       }
-#ifdef __cplusplus
-#ifdef KR_headers
-extern void f_exit();
-VOID s_stop(s, n) char *s; ftnlen n;
-#undef abs
-#undef min
-#undef max
-#ifdef __cplusplus
-extern "C" {
-#ifdef __cplusplus
-extern "C" {
-void f_exit(void);
-int s_stop(char *s, ftnlen n)
-int i;
-if(n > 0)
-       {
-       fprintf(stderr, "STOP ");
-       for(i = 0; i<n ; ++i)
-               putc(*s++, stderr);
-       fprintf(stderr, " statement executed\n");
-       }
-#ifdef NO_ONEXIT
-/* We cannot avoid (useless) compiler diagnostics here:                */
-/* some compilers complain if there is no return statement,    */
-/* and others complain that this one cannot be reached.                */
-return 0; /* NOT REACHED */
-#ifdef __cplusplus
-#ifdef __cplusplus
diff --git a/superlu/get_perm_c.c b/superlu/get_perm_c.c
deleted file mode 100644
index dd6558ac..00000000
--- a/superlu/get_perm_c.c
+++ /dev/null
@@ -1,472 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_ddefs.h"
-#include "colamd.h"
-extern int  genmmd_(int *, int *, int *, int *, int *, int *, int *, 
-                   int *, int *, int *, int *, int *);
-          const int m,  /* number of rows in matrix A. */
-          const int n,  /* number of columns in matrix A. */
-          const int nnz,/* number of nonzeros in matrix A. */
-          int *colptr,  /* column pointer of size n+1 for matrix A. */
-          int *rowind,  /* row indices of size nz for matrix A. */
-          int *perm_c   /* out - the column permutation vector. */
-          )
-    int Alen, *A, i, info, *p;
-    double knobs[COLAMD_KNOBS];
-    int stats[COLAMD_STATS];
-    Alen = colamd_recommended(nnz, m, n);
-    colamd_set_defaults(knobs);
-    if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) )
-        ABORT("Malloc fails for A[]");
-    if (!(p = (int *) SUPERLU_MALLOC((n+1) * sizeof(int))) )
-        ABORT("Malloc fails for p[]");
-    for (i = 0; i <= n; ++i) p[i] = colptr[i];
-    for (i = 0; i < nnz; ++i) A[i] = rowind[i];
-    info = colamd(m, n, Alen, A, p, knobs, stats);
-    if ( info == FALSE ) ABORT("COLAMD failed");
-    for (i = 0; i < n; ++i) perm_c[p[i]] = i;
-       const int m,      /* number of rows in matrix A. */
-       const int n,      /* number of columns in matrix A. */
-       const int nz,     /* number of nonzeros in matrix A */
-       int *colptr,      /* column pointer of size n+1 for matrix A. */
-       int *rowind,      /* row indices of size nz for matrix A. */
-       int *atanz,       /* out - on exit, returns the actual number of
-                            nonzeros in matrix A'*A. */
-       int **ata_colptr, /* out - size n+1 */
-       int **ata_rowind  /* out - size *atanz */
-       )
- * Purpose
- * =======
- *
- * Form the structure of A'*A. A is an m-by-n matrix in column oriented
- * format represented by (colptr, rowind). The output A'*A is in column
- * oriented format (symmetrically, also row oriented), represented by
- * (ata_colptr, ata_rowind).
- *
- * This routine is modified from GETATA routine by Tim Davis.
- * The complexity of this algorithm is: SUM_{i=1,m} r(i)^2,
- * i.e., the sum of the square of the row counts.
- *
- * Questions
- * =========
- *     o  Do I need to withhold the *dense* rows?
- *     o  How do I know the number of nonzeros in A'*A?
- * 
- */
-    register int i, j, k, col, num_nz, ti, trow;
-    int *marker, *b_colptr, *b_rowind;
-    int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
-    if ( !(marker = (int*) SUPERLU_MALLOC((SUPERLU_MAX(m,n)+1)*sizeof(int))) )
-       ABORT("SUPERLU_MALLOC fails for marker[]");
-    if ( !(t_colptr = (int*) SUPERLU_MALLOC((m+1) * sizeof(int))) )
-       ABORT("SUPERLU_MALLOC t_colptr[]");
-    if ( !(t_rowind = (int*) SUPERLU_MALLOC(nz * sizeof(int))) )
-       ABORT("SUPERLU_MALLOC fails for t_rowind[]");
-    /* Get counts of each column of T, and set up column pointers */
-    for (i = 0; i < m; ++i) marker[i] = 0;
-    for (j = 0; j < n; ++j) {
-       for (i = colptr[j]; i < colptr[j+1]; ++i)
-           ++marker[rowind[i]];
-    }
-    t_colptr[0] = 0;
-    for (i = 0; i < m; ++i) {
-       t_colptr[i+1] = t_colptr[i] + marker[i];
-       marker[i] = t_colptr[i];
-    }
-    /* Transpose the matrix from A to T */
-    for (j = 0; j < n; ++j)
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           col = rowind[i];
-           t_rowind[marker[col]] = j;
-           ++marker[col];
-       }
-    /* ----------------------------------------------------------------
-       compute B = T * A, where column j of B is:
-       Struct (B_*j) =    UNION   ( Struct (T_*k) )
-                        A_kj != 0
-       do not include the diagonal entry
-       ( Partition A as: A = (A_*1, ..., A_*n)
-         Then B = T * A = (T * A_*1, ..., T * A_*n), where
-         T * A_*j = (T_*1, ..., T_*m) * A_*j.  )
-       ---------------------------------------------------------------- */
-    /* Zero the diagonal flag */
-    for (i = 0; i < n; ++i) marker[i] = -1;
-    /* First pass determines number of nonzeros in B */
-    num_nz = 0;
-    for (j = 0; j < n; ++j) {
-       /* Flag the diagonal so it's not included in the B matrix */
-       marker[j] = j;
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           /* A_kj is nonzero, add pattern of column T_*k to B_*j */
-           k = rowind[i];
-           for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
-               trow = t_rowind[ti];
-               if ( marker[trow] != j ) {
-                   marker[trow] = j;
-                   num_nz++;
-               }
-           }
-       }
-    }
-    *atanz = num_nz;
-    /* Allocate storage for A'*A */
-    if ( !(*ata_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails for ata_colptr[]");
-    if ( *atanz ) {
-       if ( !(*ata_rowind = (int*) SUPERLU_MALLOC( *atanz * sizeof(int)) ) )
-           ABORT("SUPERLU_MALLOC fails for ata_rowind[]");
-    }
-    b_colptr = *ata_colptr; /* aliasing */
-    b_rowind = *ata_rowind;
-    /* Zero the diagonal flag */
-    for (i = 0; i < n; ++i) marker[i] = -1;
-    /* Compute each column of B, one at a time */
-    num_nz = 0;
-    for (j = 0; j < n; ++j) {
-       b_colptr[j] = num_nz;
-       /* Flag the diagonal so it's not included in the B matrix */
-       marker[j] = j;
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           /* A_kj is nonzero, add pattern of column T_*k to B_*j */
-           k = rowind[i];
-           for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
-               trow = t_rowind[ti];
-               if ( marker[trow] != j ) {
-                   marker[trow] = j;
-                   b_rowind[num_nz++] = trow;
-               }
-           }
-       }
-    }
-    b_colptr[n] = num_nz;
-    SUPERLU_FREE(marker);
-    SUPERLU_FREE(t_colptr);
-    SUPERLU_FREE(t_rowind);
-         const int n,      /* number of columns in matrix A. */
-         const int nz,     /* number of nonzeros in matrix A */
-         int *colptr,      /* column pointer of size n+1 for matrix A. */
-         int *rowind,      /* row indices of size nz for matrix A. */
-         int *bnz,         /* out - on exit, returns the actual number of
-                               nonzeros in matrix A'*A. */
-         int **b_colptr,   /* out - size n+1 */
-         int **b_rowind    /* out - size *bnz */
-         )
- * Purpose
- * =======
- *
- * Form the structure of A'+A. A is an n-by-n matrix in column oriented
- * format represented by (colptr, rowind). The output A'+A is in column
- * oriented format (symmetrically, also row oriented), represented by
- * (b_colptr, b_rowind).
- *
- */
-    register int i, j, k, col, num_nz;
-    int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
-    int *marker;
-    if ( !(marker = (int*) SUPERLU_MALLOC( n * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails for marker[]");
-    if ( !(t_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails for t_colptr[]");
-    if ( !(t_rowind = (int*) SUPERLU_MALLOC( nz * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails t_rowind[]");
-    /* Get counts of each column of T, and set up column pointers */
-    for (i = 0; i < n; ++i) marker[i] = 0;
-    for (j = 0; j < n; ++j) {
-       for (i = colptr[j]; i < colptr[j+1]; ++i)
-           ++marker[rowind[i]];
-    }
-    t_colptr[0] = 0;
-    for (i = 0; i < n; ++i) {
-       t_colptr[i+1] = t_colptr[i] + marker[i];
-       marker[i] = t_colptr[i];
-    }
-    /* Transpose the matrix from A to T */
-    for (j = 0; j < n; ++j)
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           col = rowind[i];
-           t_rowind[marker[col]] = j;
-           ++marker[col];
-       }
-    /* ----------------------------------------------------------------
-       compute B = A + T, where column j of B is:
-       Struct (B_*j) = Struct (A_*k) UNION Struct (T_*k)
-       do not include the diagonal entry
-       ---------------------------------------------------------------- */
-    /* Zero the diagonal flag */
-    for (i = 0; i < n; ++i) marker[i] = -1;
-    /* First pass determines number of nonzeros in B */
-    num_nz = 0;
-    for (j = 0; j < n; ++j) {
-       /* Flag the diagonal so it's not included in the B matrix */
-       marker[j] = j;
-       /* Add pattern of column A_*k to B_*j */
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           k = rowind[i];
-           if ( marker[k] != j ) {
-               marker[k] = j;
-               ++num_nz;
-           }
-       }
-       /* Add pattern of column T_*k to B_*j */
-       for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
-           k = t_rowind[i];
-           if ( marker[k] != j ) {
-               marker[k] = j;
-               ++num_nz;
-           }
-       }
-    }
-    *bnz = num_nz;
-    /* Allocate storage for A+A' */
-    if ( !(*b_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails for b_colptr[]");
-    if ( *bnz) {
-      if ( !(*b_rowind = (int*) SUPERLU_MALLOC( *bnz * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails for b_rowind[]");
-    }
-    /* Zero the diagonal flag */
-    for (i = 0; i < n; ++i) marker[i] = -1;
-    /* Compute each column of B, one at a time */
-    num_nz = 0;
-    for (j = 0; j < n; ++j) {
-       (*b_colptr)[j] = num_nz;
-       /* Flag the diagonal so it's not included in the B matrix */
-       marker[j] = j;
-       /* Add pattern of column A_*k to B_*j */
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           k = rowind[i];
-           if ( marker[k] != j ) {
-               marker[k] = j;
-               (*b_rowind)[num_nz++] = k;
-           }
-       }
-       /* Add pattern of column T_*k to B_*j */
-       for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
-           k = t_rowind[i];
-           if ( marker[k] != j ) {
-               marker[k] = j;
-               (*b_rowind)[num_nz++] = k;
-           }
-       }
-    }
-    (*b_colptr)[n] = num_nz;
-    SUPERLU_FREE(marker);
-    SUPERLU_FREE(t_colptr);
-    SUPERLU_FREE(t_rowind);
-get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
- * Purpose
- * =======
- *
- * GET_PERM_C obtains a permutation matrix Pc, by applying the multiple
- * minimum degree ordering code by Joseph Liu to matrix A'*A or A+A'.
- * or using approximate minimum degree column ordering by Davis et. al.
- * The LU factorization of A*Pc tends to have less fill than the LU 
- * factorization of A.
- *
- * Arguments
- * =========
- *
- * ispec   (input) int
- *         Specifies the type of column ordering to reduce fill:
- *         = 1: minimum degree on the structure of A^T * A
- *         = 2: minimum degree on the structure of A^T + A
- *         = 3: approximate minimum degree for unsymmetric matrices
- *         If ispec == 0, the natural ordering (i.e., Pc = I) is returned.
- * 
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A 
- *         can be: Stype = NC; Dtype = _D; Mtype = GE. In the future,
- *         more general A can be handled.
- *
- * perm_c  (output) int*
- *        Column permutation vector of size A->ncol, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- */
-    NCformat *Astore = A->Store;
-    int m, n, bnz = 0, *b_colptr, i;
-    int delta, maxint, nofsub, *invp;
-    int *b_rowind, *dhead, *qsize, *llist, *marker;
-    double t, SuperLU_timer_();
-    m = A->nrow;
-    n = A->ncol;
-    t = SuperLU_timer_();
-    switch ( ispec ) {
-        case 0: /* Natural ordering */
-             for (i = 0; i < n; ++i) perm_c[i] = i;
-#if ( PRNTlevel>=1 )
-             printf("Use natural column ordering.\n");
-             return;
-        case 1: /* Minimum degree ordering on A'*A */
-             getata(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
-                    &bnz, &b_colptr, &b_rowind);
-#if ( PRNTlevel>=1 )
-             printf("Use minimum degree ordering on A'*A.\n");
-             t = SuperLU_timer_() - t;
-             /*printf("Form A'*A time = %8.3f\n", t);*/
-             break;
-        case 2: /* Minimum degree ordering on A'+A */
-             if ( m != n ) ABORT("Matrix is not square");
-             at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
-                       &bnz, &b_colptr, &b_rowind);
-#if ( PRNTlevel>=1 )
-             printf("Use minimum degree ordering on A'+A.\n");
-             t = SuperLU_timer_() - t;
-             /*printf("Form A'+A time = %8.3f\n", t);*/
-             break;
-        case 3: /* Approximate minimum degree column ordering. */
-             get_colamd(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
-                        perm_c);
-#if ( PRNTlevel>=1 )
-             printf(".. Use approximate minimum degree column ordering.\n");
-             return; 
-        default:
-             ABORT("Invalid ISPEC");
-    }
-    if ( bnz != 0 ) {
-       t = SuperLU_timer_();
-       /* Initialize and allocate storage for GENMMD. */
-       delta = 1; /* DELTA is a parameter to allow the choice of nodes
-                     whose degree <= min-degree + DELTA. */
-       maxint = 2147483647; /* 2**31 - 1 */
-       invp = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
-       if ( !invp ) ABORT("SUPERLU_MALLOC fails for invp.");
-       dhead = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
-       if ( !dhead ) ABORT("SUPERLU_MALLOC fails for dhead.");
-       qsize = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
-       if ( !qsize ) ABORT("SUPERLU_MALLOC fails for qsize.");
-       llist = (int *) SUPERLU_MALLOC(n*sizeof(int));
-       if ( !llist ) ABORT("SUPERLU_MALLOC fails for llist.");
-       marker = (int *) SUPERLU_MALLOC(n*sizeof(int));
-       if ( !marker ) ABORT("SUPERLU_MALLOC fails for marker.");
-       /* Transform adjacency list into 1-based indexing required by GENMMD.*/
-       for (i = 0; i <= n; ++i) ++b_colptr[i];
-       for (i = 0; i < bnz; ++i) ++b_rowind[i];
-       genmmd_(&n, b_colptr, b_rowind, perm_c, invp, &delta, dhead, 
-               qsize, llist, marker, &maxint, &nofsub);
-       /* Transform perm_c into 0-based indexing. */
-       for (i = 0; i < n; ++i) --perm_c[i];
-       SUPERLU_FREE(invp);
-       SUPERLU_FREE(dhead);
-       SUPERLU_FREE(qsize);
-       SUPERLU_FREE(llist);
-       SUPERLU_FREE(marker);
-       SUPERLU_FREE(b_rowind);
-       t = SuperLU_timer_() - t;
-       /*  printf("call GENMMD time = %8.3f\n", t);*/
-    } else { /* Empty adjacency structure */
-       for (i = 0; i < n; ++i) perm_c[i] = i;
-    }
-    SUPERLU_FREE(b_colptr);
diff --git a/superlu/heap_relax_snode.c b/superlu/heap_relax_snode.c
deleted file mode 100644
index 80ed279b..00000000
--- a/superlu/heap_relax_snode.c
+++ /dev/null
@@ -1,113 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_ddefs.h"
-heap_relax_snode (
-            const     int n,
-            int       *et,           /* column elimination tree */
-            const int relax_columns, /* max no of columns allowed in a
-                                        relaxed snode */
-            int       *descendants,  /* no of descendants of each node
-                                        in the etree */
-            int       *relax_end     /* last column in a supernode */
-            )
- * Purpose
- * =======
- *    relax_snode() - Identify the initial relaxed supernodes, assuming that 
- *    the matrix has been reordered according to the postorder of the etree.
- *
- */ 
-    register int i, j, k, l, parent;
-    register int snode_start;  /* beginning of a snode */
-    int *et_save, *post, *inv_post, *iwork;
-    int nsuper_et = 0, nsuper_et_post = 0;
-    /* The etree may not be postordered, but is heap ordered. */
-    iwork = (int*) intMalloc(3*n+2); 
-    if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
-    inv_post = iwork + n+1;
-    et_save = inv_post + n+1;
-    /* Post order etree */
-    post = (int *) TreePostorder(n, et);
-    for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
-    /* Renumber etree in postorder */
-    for (i = 0; i < n; ++i) {
-        iwork[post[i]] = post[et[i]];
-       et_save[i] = et[i]; /* Save the original etree */
-    }
-    for (i = 0; i < n; ++i) et[i] = iwork[i];
-    /* Compute the number of descendants of each node in the etree */
-    ifill (relax_end, n, EMPTY);
-    for (j = 0; j < n; j++) descendants[j] = 0;
-    for (j = 0; j < n; j++) {
-       parent = et[j];
-       if ( parent != n )  /* not the dummy root */
-           descendants[parent] += descendants[j] + 1;
-    }
-    /* Identify the relaxed supernodes by postorder traversal of the etree. */
-    for (j = 0; j < n; ) { 
-       parent = et[j];
-        snode_start = j;
-       while ( parent != n && descendants[parent] < relax_columns ) {
-           j = parent;
-           parent = et[j];
-       }
-       /* Found a supernode in postordered etree; j is the last column. */
-       ++nsuper_et_post;
-       k = n;
-       for (i = snode_start; i <= j; ++i)
-           k = SUPERLU_MIN(k, inv_post[i]);
-       l = inv_post[j];
-       if ( (l - k) == (j - snode_start) ) {
-           /* It's also a supernode in the original etree */
-           relax_end[k] = l;           /* Last column is recorded */
-           ++nsuper_et;
-       } else {
-           for (i = snode_start; i <= j; ++i) {
-               l = inv_post[i];
-               if ( descendants[i] == 0 ) relax_end[l] = l;
-           }
-       }
-       j++;
-       /* Search for a new leaf */
-       while ( descendants[j] != 0 && j < n ) j++;
-    }
-#if ( PRNTlevel>=1 )
-    printf(".. heap_snode_relax:\n"
-          "\tNo of relaxed snodes in postordered etree:\t%d\n"
-          "\tNo of relaxed snodes in original etree:\t%d\n",
-          nsuper_et_post, nsuper_et);
-    /* Recover the original etree */
-    for (i = 0; i < n; ++i) et[i] = et_save[i];
-    SUPERLU_FREE(post);
-    SUPERLU_FREE(iwork);
diff --git a/superlu/icmax1.c b/superlu/icmax1.c
deleted file mode 100644
index d5d15411..00000000
--- a/superlu/icmax1.c
+++ /dev/null
@@ -1,124 +0,0 @@
-#include <math.h>
-#include "slu_scomplex.h"
-#include "slu_Cnames.h"
-int icmax1_(int *n, complex *cx, int *incx)
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-/*! @file icmax1.c
- * \brief Finds the index of the element whose real part has maximum absolute 
- *
- * <pre>
- *     -- LAPACK auxiliary routine (version 2.0) --   
- *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
- *     Courant Institute, Argonne National Lab, and Rice University   
- *     October 31, 1992   
- * </pre>
- */
-    Purpose   
-    =======   
-    ICMAX1 finds the index of the element whose real part has maximum   
-    absolute value.   
-    Based on ICAMAX from Level 1 BLAS.   
-    The change is to use the 'genuine' absolute value.   
-    Contributed by Nick Higham for use with CLACON.   
-    Arguments   
-    =========   
-    N       (input) INT   
-            The number of elements in the vector CX.   
-    CX      (input) COMPLEX array, dimension (N)   
-            The vector whose elements will be summed.   
-    INCX    (input) INT   
-            The spacing between successive values of CX.  INCX >= 1.   
-   ===================================================================== 
-   Parameter adjustments   
-       Function Body */
-    /* System generated locals */
-    int ret_val, i__1, i__2;
-    float r__1;
-    /* Local variables */
-    static float smax;
-    static int i, ix;
-#define CX(I) cx[(I)-1]
-    ret_val = 0;
-    if (*n < 1) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L30;
-    }
-    ix = 1;
-    smax = (r__1 = CX(1).r, fabs(r__1));
-    ix += *incx;
-    i__1 = *n;
-    for (i = 2; i <= *n; ++i) {
-       i__2 = ix;
-       if ((r__1 = CX(ix).r, fabs(r__1)) <= smax) {
-           goto L10;
-       }
-       ret_val = i;
-       i__2 = ix;
-       smax = (r__1 = CX(ix).r, fabs(r__1));
-       ix += *incx;
-/* L20: */
-    }
-    return ret_val;
-    smax = (r__1 = CX(1).r, fabs(r__1));
-    i__1 = *n;
-    for (i = 2; i <= *n; ++i) {
-       i__2 = i;
-       if ((r__1 = CX(i).r, fabs(r__1)) <= smax) {
-           goto L40;
-       }
-       ret_val = i;
-       i__2 = i;
-       smax = (r__1 = CX(i).r, fabs(r__1));
-       ;
-    }
-    return ret_val;
-/*     End of ICMAX1 */
-} /* icmax1_ */
diff --git a/superlu/izmax1.c b/superlu/izmax1.c
deleted file mode 100644
index 75b3279e..00000000
--- a/superlu/izmax1.c
+++ /dev/null
@@ -1,117 +0,0 @@
-#include <math.h>
-#include "slu_Cnames.h"
-#include "slu_dcomplex.h"
-izmax1_(int *n, doublecomplex *cx, int *incx)
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-/*! @file izmax1.c
- * \brief Finds the index of the element whose real part has maximum absolute 
- *
- * <pre>
- *     -- LAPACK auxiliary routine (version 2.0) --   
- *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
- *     Courant Institute, Argonne National Lab, and Rice University   
- *     October 31, 1992   
- * </pre>
- */
-    Purpose   
-    =======   
-    IZMAX1 finds the index of the element whose real part has maximum   
-    absolute value.   
-    Based on IZAMAX from Level 1 BLAS.   
-    The change is to use the 'genuine' absolute value.   
-    Contributed by Nick Higham for use with ZLACON.   
-    Arguments   
-    =========   
-    N       (input) INT   
-            The number of elements in the vector CX.   
-    CX      (input) COMPLEX*16 array, dimension (N)   
-            The vector whose elements will be summed.   
-    INCX    (input) INT   
-            The spacing between successive values of CX.  INCX >= 1.   
-   ===================================================================== 
-    /* System generated locals */
-    int ret_val, i__1, i__2;
-    double d__1;
-    /* Local variables */
-    double smax;
-    int i, ix;
-#define CX(I) cx[(I)-1]
-    ret_val = 0;
-    if (*n < 1) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L30;
-    }
-    ix = 1;
-    smax = (d__1 = CX(1).r, fabs(d__1));
-    ix += *incx;
-    i__1 = *n;
-    for (i = 2; i <= *n; ++i) {
-       i__2 = ix;
-       if ((d__1 = CX(ix).r, fabs(d__1)) <= smax) {
-           goto L10;
-       }
-       ret_val = i;
-       i__2 = ix;
-       smax = (d__1 = CX(ix).r, fabs(d__1));
-       ix += *incx;
-/* L20: */
-    }
-    return ret_val;
-    smax = (d__1 = CX(1).r, fabs(d__1));
-    i__1 = *n;
-    for (i = 2; i <= *n; ++i) {
-       i__2 = i;
-       if ((d__1 = CX(i).r, fabs(d__1)) <= smax) {
-           goto L40;
-       }
-       ret_val = i;
-       i__2 = i;
-       smax = (d__1 = CX(i).r, fabs(d__1));
-       ;
-    }
-    return ret_val;
-/*     End of IZMAX1 */
-} /* izmax1_ */
diff --git a/superlu/lsame.c b/superlu/lsame.c
deleted file mode 100644
index e235b88d..00000000
--- a/superlu/lsame.c
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "slu_Cnames.h"
-int lsame_(char *ca, char *cb)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Copyright (c) 1992-2013 The University of Tennessee and The University
-                        of Tennessee Research Foundation.  All rights
-                        reserved.
-       Copyright (c) 2000-2013 The University of California Berkeley. All
-                        rights reserved.
-       Copyright (c) 2006-2013 The University of Colorado Denver.  All rights
-                        reserved.
-       Redistribution and use in source and binary forms, with or without
-       modification, are permitted provided that the following conditions are
-       met:
-       - Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       - Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer listed
-         in this license in the documentation and/or other materials
-         provided with the distribution.
-       - Neither the name of the copyright holders nor the names of its
-         contributors may be used to endorse or promote products derived from
-         this software without specific prior written permission.
-       The copyright holders provide no reassurances that the source code
-       provided does not infringe any patent, copyright, or any other
-       intellectual property rights of third parties.  The copyright holders
-       disclaim any liability to any recipient for claims brought against
-       recipient by any third party for infringement of that parties
-       intellectual property rights.
-    Purpose   
-    =======   
-    LSAME returns .TRUE. if CA is the same letter as CB regardless of case.   
-    Arguments   
-    =========   
-    CA      (input) CHARACTER*1   
-    CB      (input) CHARACTER*1   
-            CA and CB specify the single characters to be compared.   
-   ===================================================================== 
-    /* System generated locals */
-    int ret_val;
-    /* Local variables */
-    int inta, intb, zcode;
-    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
-    if (ret_val) {
-       return ret_val;
-    }
-    /* Now test for equivalence if both characters are alphabetic. */
-    zcode = 'Z';
-    /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime   
-       machines, on which ICHAR returns a value with bit 8 set.   
-       ICHAR('A') on Prime machines returns 193 which is the same as   
-       ICHAR('A') on an EBCDIC machine. */
-    inta = *(unsigned char *)ca;
-    intb = *(unsigned char *)cb;
-    if (zcode == 90 || zcode == 122) {
-       /* ASCII is assumed - ZCODE is the ASCII code of either lower or   
-          upper case 'Z'. */
-       if (inta >= 97 && inta <= 122) inta += -32;
-       if (intb >= 97 && intb <= 122) intb += -32;
-    } else if (zcode == 233 || zcode == 169) {
-       /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or   
-          upper case 'Z'. */
-       if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta 
-               >= 162 && inta <= 169)
-           inta += 64;
-       if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb 
-               >= 162 && intb <= 169)
-           intb += 64;
-    } else if (zcode == 218 || zcode == 250) {
-       /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code   
-          plus 128 of either lower or upper case 'Z'. */
-       if (inta >= 225 && inta <= 250) inta += -32;
-       if (intb >= 225 && intb <= 250) intb += -32;
-    }
-    ret_val = inta == intb;
-    return ret_val;
-} /* lsame_ */
diff --git a/superlu/memory.c b/superlu/memory.c
deleted file mode 100644
index c614e14d..00000000
--- a/superlu/memory.c
+++ /dev/null
@@ -1,230 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-/** Precision-independent memory-related routines.
-    (Shared by [sdcz]memory.c) **/
-#include "slu_ddefs.h"
-#if ( DEBUGlevel>=1 )           /* Debug malloc/free. */
-int superlu_malloc_total = 0;
-#define PAD_FACTOR  2
-#define DWORD  (sizeof(double)) /* Be sure it's no smaller than double. */
-/* size_t is usually defined as 'unsigned long' */
-void *superlu_malloc(size_t size)
-    char *buf;
-    buf = (char *) malloc(size + DWORD);
-    if ( !buf ) {
-       printf("superlu_malloc fails: malloc_total %.0f MB, size %ld\n",
-              superlu_malloc_total*1e-6, size);
-       ABORT("superlu_malloc: out of memory");
-    }
-    ((int_t *) buf)[0] = size;
-#if 0
-    superlu_malloc_total += size + DWORD;
-    superlu_malloc_total += size;
-    return (void *) (buf + DWORD);
-void superlu_free(void *addr)
-    char *p = ((char *) addr) - DWORD;
-    if ( !addr )
-       ABORT("superlu_free: tried to free NULL pointer");
-    if ( !p )
-       ABORT("superlu_free: tried to free NULL+DWORD pointer");
-    { 
-       int_t n = ((int_t *) p)[0];
-       if ( !n )
-           ABORT("superlu_free: tried to free a freed pointer");
-       *((int_t *) p) = 0; /* Set to zero to detect duplicate free's. */
-#if 0  
-       superlu_malloc_total -= (n + DWORD);
-       superlu_malloc_total -= n;
-       if ( superlu_malloc_total < 0 )
-           ABORT("superlu_malloc_total went negative!");
-       /*free (addr);*/
-       free (p);
-    }
-#else   /* production mode */
-void *superlu_malloc(size_t size)
-    void *buf;
-    buf = (void *) malloc(size);
-    return (buf);
-void superlu_free(void *addr)
-    free (addr);
- * Set up pointers for integer working arrays.
- */
-SetIWork(int m, int n, int panel_size, int *iworkptr, int **segrep,
-        int **parent, int **xplore, int **repfnz, int **panel_lsub,
-        int **xprune, int **marker)
-    *segrep = iworkptr;
-    *parent = iworkptr + m;
-    *xplore = *parent + m;
-    *repfnz = *xplore + m;
-    *panel_lsub = *repfnz + panel_size * m;
-    *xprune = *panel_lsub + panel_size * m;
-    *marker = *xprune + n;
-    ifill (*repfnz, m * panel_size, EMPTY);
-    ifill (*panel_lsub, m * panel_size, EMPTY);
-copy_mem_int(int howmany, void *old, void *new)
-    register int i;
-    int *iold = old;
-    int *inew = new;
-    for (i = 0; i < howmany; i++) inew[i] = iold[i];
-user_bcopy(char *src, char *dest, int bytes)
-    char *s_ptr, *d_ptr;
-    s_ptr = src + bytes - 1;
-    d_ptr = dest + bytes - 1;
-    for (; d_ptr >= dest; --s_ptr, --d_ptr ) *d_ptr = *s_ptr;
-int *intMalloc(int n)
-    int *buf;
-    buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC fails for buf in intMalloc()");
-    }
-    return (buf);
-int *intCalloc(int n)
-    int *buf;
-    register int i;
-    buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC fails for buf in intCalloc()");
-    }
-    for (i = 0; i < n; ++i) buf[i] = 0;
-    return (buf);
-#if 0
-    int p;
-    printf("Check expanders:\n");
-    for (p = 0; p < NO_MEMTYPE; p++) {
-       printf("type %d, size %d, mem %d\n",
-              p, expanders[p].size, (int)expanders[p].mem);
-    }
-    return 0;
-    printf("Stack: size %d, used %d, top1 %d, top2 %d\n",
-          stack.size, stack.used, stack.top1, stack.top2);
-    return 0;
-PrintStack(char *msg, GlobalLU_t *Glu)
-    int i;
-    int *xlsub, *lsub, *xusub, *usub;
-    xlsub = Glu->xlsub;
-    lsub  = Glu->lsub;
-    xusub = Glu->xusub;
-    usub  = Glu->usub;
-    printf("%s\n", msg);
-/*    printf("\nUCOL: ");
-    for (i = 0; i < xusub[ndim]; ++i)
-       printf("%f  ", ucol[i]);
-    printf("\nLSUB: ");
-    for (i = 0; i < xlsub[ndim]; ++i)
-       printf("%d  ", lsub[i]);
-    printf("\nUSUB: ");
-    for (i = 0; i < xusub[ndim]; ++i)
-       printf("%d  ", usub[i]);
-    printf("\n");*/
-    return 0;
diff --git a/superlu/mmd.c b/superlu/mmd.c
deleted file mode 100644
index 1fb196c9..00000000
--- a/superlu/mmd.c
+++ /dev/null
@@ -1,1021 +0,0 @@
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-typedef int shortint;
-/* *************************************************************** */
-/* *************************************************************** */
-/* ****     GENMMD ..... MULTIPLE MINIMUM EXTERNAL DEGREE     **** */
-/* *************************************************************** */
-/* *************************************************************** */
-/*     AUTHOR - JOSEPH W.H. LIU */
-/*        EXTERNAL DEGREE. */
-/*        --------------------------------------------- */
-/*        DESTROYED. */
-/*        --------------------------------------------- */
-/*                 NODES. */
-/*        INVP   - THE INVERSE OF PERM. */
-/* *************************************************************** */
-/* Subroutine */ int genmmd_(int *neqns, int *xadj, shortint *adjncy, 
-       shortint *invp, shortint *perm, int *delta, shortint *dhead, 
-       shortint *qsize, shortint *llist, shortint *marker, int *maxint, 
-       int *nofsub)
-    /* System generated locals */
-    int i__1;
-    /* Local variables */
-    static int mdeg, ehead, i, mdlmt, mdnode;
-    extern /* Subroutine */ int mmdelm_(int *, int *, shortint *, 
-           shortint *, shortint *, shortint *, shortint *, shortint *, 
-           shortint *, int *, int *), mmdupd_(int *, int *, 
-           int *, shortint *, int *, int *, shortint *, shortint 
-           *, shortint *, shortint *, shortint *, shortint *, int *, 
-           int *), mmdint_(int *, int *, shortint *, shortint *, 
-           shortint *, shortint *, shortint *, shortint *, shortint *), 
-           mmdnum_(int *, shortint *, shortint *, shortint *);
-    static int nextmd, tag, num;
-/* *************************************************************** */
-/* *************************************************************** */
-    /* Parameter adjustments */
-    --marker;
-    --llist;
-    --qsize;
-    --dhead;
-    --perm;
-    --invp;
-    --adjncy;
-    --xadj;
-    /* Function Body */
-    if (*neqns <= 0) {
-       return 0;
-    }
-/*        ------------------------------------------------ */
-/*        ------------------------------------------------ */
-    *nofsub = 0;
-    mmdint_(neqns, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &
-           qsize[1], &llist[1], &marker[1]);
-/*        ---------------------------------------------- */
-/*        ---------------------------------------------- */
-    num = 1;
-/*        ----------------------------- */
-/*        ----------------------------- */
-    nextmd = dhead[1];
-    if (nextmd <= 0) {
-       goto L200;
-    }
-    mdnode = nextmd;
-    nextmd = invp[mdnode];
-    marker[mdnode] = *maxint;
-    invp[mdnode] = -num;
-    ++num;
-    goto L100;
-/*        ---------------------------------------- */
-/*        ---------------------------------------- */
-    if (num > *neqns) {
-       goto L1000;
-    }
-    tag = 1;
-    dhead[1] = 0;
-    mdeg = 2;
-    if (dhead[mdeg] > 0) {
-       goto L400;
-    }
-    ++mdeg;
-    goto L300;
-/*            ------------------------------------------------- */
-/*            ------------------------------------------------- */
-    mdlmt = mdeg + *delta;
-    ehead = 0;
-    mdnode = dhead[mdeg];
-    if (mdnode > 0) {
-       goto L600;
-    }
-    ++mdeg;
-    if (mdeg > mdlmt) {
-       goto L900;
-    }
-    goto L500;
-/*                ---------------------------------------- */
-/*                ---------------------------------------- */
-    nextmd = invp[mdnode];
-    dhead[mdeg] = nextmd;
-    if (nextmd > 0) {
-       perm[nextmd] = -mdeg;
-    }
-    invp[mdnode] = -num;
-    *nofsub = *nofsub + mdeg + qsize[mdnode] - 2;
-    if (num + qsize[mdnode] > *neqns) {
-       goto L1000;
-    }
-/*                ---------------------------------------------- */
-/*                ---------------------------------------------- */
-    ++tag;
-    if (tag < *maxint) {
-       goto L800;
-    }
-    tag = 1;
-    i__1 = *neqns;
-    for (i = 1; i <= i__1; ++i) {
-       if (marker[i] < *maxint) {
-           marker[i] = 0;
-       }
-/* L700: */
-    }
-    mmdelm_(&mdnode, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &
-           qsize[1], &llist[1], &marker[1], maxint, &tag);
-    num += qsize[mdnode];
-    llist[mdnode] = ehead;
-    ehead = mdnode;
-    if (*delta >= 0) {
-       goto L500;
-    }
-/*            ------------------------------------------- */
-/*            ------------------------------------------- */
-    if (num > *neqns) {
-       goto L1000;
-    }
-    mmdupd_(&ehead, neqns, &xadj[1], &adjncy[1], delta, &mdeg, &dhead[1], &
-           invp[1], &perm[1], &qsize[1], &llist[1], &marker[1], maxint, &tag)
-           ;
-    goto L300;
-    mmdnum_(neqns, &perm[1], &invp[1], &qsize[1]);
-    return 0;
-} /* genmmd_ */
-/* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
-/*     AUTHOR - JOSEPH W.H. LIU */
-/*        ALGORITHM. */
-/*        LLIST  - LINKED LIST. */
-/*        MARKER - MARKER VECTOR. */
-/* *************************************************************** */
-/* Subroutine */ int mmdint_(int *neqns, int *xadj, shortint *adjncy, 
-       shortint *dhead, shortint *dforw, shortint *dbakw, shortint *qsize, 
-       shortint *llist, shortint *marker)
-    /* System generated locals */
-    int i__1;
-    /* Local variables */
-    static int ndeg, node, fnode;
-/* *************************************************************** */
-/* *************************************************************** */
-    /* Parameter adjustments */
-    --marker;
-    --llist;
-    --qsize;
-    --dbakw;
-    --dforw;
-    --dhead;
-    --adjncy;
-    --xadj;
-    /* Function Body */
-    i__1 = *neqns;
-    for (node = 1; node <= i__1; ++node) {
-       dhead[node] = 0;
-       qsize[node] = 1;
-       marker[node] = 0;
-       llist[node] = 0;
-/* L100: */
-    }
-/*        ------------------------------------------ */
-/*        ------------------------------------------ */
-    i__1 = *neqns;
-    for (node = 1; node <= i__1; ++node) {
-       ndeg = xadj[node + 1] - xadj[node] + 1;
-       fnode = dhead[ndeg];
-       dforw[node] = fnode;
-       dhead[ndeg] = node;
-       if (fnode > 0) {
-           dbakw[fnode] = node;
-       }
-       dbakw[node] = -ndeg;
-/* L200: */
-    }
-    return 0;
-} /* mmdint_ */
-/* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
-/* *************************************************************** */
-/*     AUTHOR - JOSEPH W.H. LIU */
-/*        ELIMINATION GRAPH. */
-/*                 INT. */
-/*        TAG    - TAG VALUE. */
-/*        QSIZE  - SIZE OF SUPERNODE. */
-/*        MARKER - MARKER VECTOR. */
-/* *************************************************************** */
-/* Subroutine */ int mmdelm_(int *mdnode, int *xadj, shortint *adjncy,
-        shortint *dhead, shortint *dforw, shortint *dbakw, shortint *qsize, 
-       shortint *llist, shortint *marker, int *maxint, int *tag)
-    /* System generated locals */
-    int i__1, i__2;
-    /* Local variables */
-    static int node, link, rloc, rlmt, i, j, nabor, rnode, elmnt, xqnbr, 
-           istop, jstop, istrt, jstrt, nxnode, pvnode, nqnbrs, npv;
-/* *************************************************************** */
-/* *************************************************************** */
-/*        ----------------------------------------------- */
-/*        ----------------------------------------------- */
-    /* Parameter adjustments */
-    --marker;
-    --llist;
-    --qsize;
-    --dbakw;
-    --dforw;
-    --dhead;
-    --adjncy;
-    --xadj;
-    /* Function Body */
-    marker[*mdnode] = *tag;
-    istrt = xadj[*mdnode];
-    istop = xadj[*mdnode + 1] - 1;
-/*        ------------------------------------------------------- */
-/*        ------------------------------------------------------- */
-    elmnt = 0;
-    rloc = istrt;
-    rlmt = istop;
-    i__1 = istop;
-    for (i = istrt; i <= i__1; ++i) {
-       nabor = adjncy[i];
-       if (nabor == 0) {
-           goto L300;
-       }
-       if (marker[nabor] >= *tag) {
-           goto L200;
-       }
-       marker[nabor] = *tag;
-       if (dforw[nabor] < 0) {
-           goto L100;
-       }
-       adjncy[rloc] = nabor;
-       ++rloc;
-       goto L200;
-       llist[nabor] = elmnt;
-       elmnt = nabor;
-       ;
-    }
-/*            ----------------------------------------------------- */
-/*            ----------------------------------------------------- */
-    if (elmnt <= 0) {
-       goto L1000;
-    }
-    adjncy[rlmt] = -elmnt;
-    link = elmnt;
-    jstrt = xadj[link];
-    jstop = xadj[link + 1] - 1;
-    i__1 = jstop;
-    for (j = jstrt; j <= i__1; ++j) {
-       node = adjncy[j];
-       link = -node;
-       if (node < 0) {
-           goto L400;
-       } else if (node == 0) {
-           goto L900;
-       } else {
-           goto L500;
-       }
-       if (marker[node] >= *tag || dforw[node] < 0) {
-           goto L800;
-       }
-       marker[node] = *tag;
-/*                            --------------------------------- */
-/*                            USE STORAGE FROM ELIMINATED NODES */
-/*                            IF NECESSARY. */
-/*                            --------------------------------- */
-       if (rloc < rlmt) {
-           goto L700;
-       }
-       link = -adjncy[rlmt];
-       rloc = xadj[link];
-       rlmt = xadj[link + 1] - 1;
-       goto L600;
-       adjncy[rloc] = node;
-       ++rloc;
-       ;
-    }
-    elmnt = llist[elmnt];
-    goto L300;
-    if (rloc <= rlmt) {
-       adjncy[rloc] = 0;
-    }
-/*        -------------------------------------------------------- */
-/*        -------------------------------------------------------- */
-    link = *mdnode;
-    istrt = xadj[link];
-    istop = xadj[link + 1] - 1;
-    i__1 = istop;
-    for (i = istrt; i <= i__1; ++i) {
-       rnode = adjncy[i];
-       link = -rnode;
-       if (rnode < 0) {
-           goto L1100;
-       } else if (rnode == 0) {
-           goto L1800;
-       } else {
-           goto L1200;
-       }
-/*                -------------------------------------------- */
-/*                IF RNODE IS IN THE DEGREE LIST STRUCTURE ... */
-/*                -------------------------------------------- */
-       pvnode = dbakw[rnode];
-       if (pvnode == 0 || pvnode == -(*maxint)) {
-           goto L1300;
-       }
-/*                    ------------------------------------- */
-/*                    THEN REMOVE RNODE FROM THE STRUCTURE. */
-/*                    ------------------------------------- */
-       nxnode = dforw[rnode];
-       if (nxnode > 0) {
-           dbakw[nxnode] = pvnode;
-       }
-       if (pvnode > 0) {
-           dforw[pvnode] = nxnode;
-       }
-       npv = -pvnode;
-       if (pvnode < 0) {
-           dhead[npv] = nxnode;
-       }
-/*                ---------------------------------------- */
-/*                ---------------------------------------- */
-       jstrt = xadj[rnode];
-       jstop = xadj[rnode + 1] - 1;
-       xqnbr = jstrt;
-       i__2 = jstop;
-       for (j = jstrt; j <= i__2; ++j) {
-           nabor = adjncy[j];
-           if (nabor == 0) {
-               goto L1500;
-           }
-           if (marker[nabor] >= *tag) {
-               goto L1400;
-           }
-           adjncy[xqnbr] = nabor;
-           ++xqnbr;
-           ;
-       }
-/*                ---------------------------------------- */
-/*                IF NO ACTIVE NABOR AFTER THE PURGING ... */
-/*                ---------------------------------------- */
-       nqnbrs = xqnbr - jstrt;
-       if (nqnbrs > 0) {
-           goto L1600;
-       }
-/*                    ----------------------------- */
-/*                    THEN MERGE RNODE WITH MDNODE. */
-/*                    ----------------------------- */
-       qsize[*mdnode] += qsize[rnode];
-       qsize[rnode] = 0;
-       marker[rnode] = *maxint;
-       dforw[rnode] = -(*mdnode);
-       dbakw[rnode] = -(*maxint);
-       goto L1700;
-/*                -------------------------------------- */
-/*                ADD MDNODE AS A NABOR OF RNODE. */
-/*                -------------------------------------- */
-       dforw[rnode] = nqnbrs + 1;
-       dbakw[rnode] = 0;
-       adjncy[xqnbr] = *mdnode;
-       ++xqnbr;
-       if (xqnbr <= jstop) {
-           adjncy[xqnbr] = 0;
-       }
-       ;
-    }
-    return 0;
-} /* mmdelm_ */
-/* *************************************************************** */
-/* *************************************************************** */
-/* *****     MMDUPD ..... MULTIPLE MINIMUM DEGREE UPDATE     ***** */
-/* *************************************************************** */
-/* *************************************************************** */
-/*     AUTHOR - JOSEPH W.H. LIU */
-/*                 NODES (I.E., NEWLY FORMED ELEMENTS). */
-/*                 INTEGER. */
-/*        QSIZE  - SIZE OF SUPERNODE. */
-/*        TAG    - TAG VALUE. */
-/* *************************************************************** */
-/* Subroutine */ int mmdupd_(int *ehead, int *neqns, int *xadj, 
-       shortint *adjncy, int *delta, int *mdeg, shortint *dhead, 
-       shortint *dforw, shortint *dbakw, shortint *qsize, shortint *llist, 
-       shortint *marker, int *maxint, int *tag)
-    /* System generated locals */
-    int i__1, i__2;
-    /* Local variables */
-    static int node, mtag, link, mdeg0, i, j, enode, fnode, nabor, elmnt, 
-           istop, jstop, q2head, istrt, jstrt, qxhead, iq2, deg, deg0;
-/* *************************************************************** */
-/* *************************************************************** */
-    /* Parameter adjustments */
-    --marker;
-    --llist;
-    --qsize;
-    --dbakw;
-    --dforw;
-    --dhead;
-    --adjncy;
-    --xadj;
-    /* Function Body */
-    mdeg0 = *mdeg + *delta;
-    elmnt = *ehead;
-/*            ------------------------------------------------------- */
-/*            (RESET TAG VALUE IF NECESSARY.) */
-/*            ------------------------------------------------------- */
-    if (elmnt <= 0) {
-       return 0;
-    }
-    mtag = *tag + mdeg0;
-    if (mtag < *maxint) {
-       goto L300;
-    }
-    *tag = 1;
-    i__1 = *neqns;
-    for (i = 1; i <= i__1; ++i) {
-       if (marker[i] < *maxint) {
-           marker[i] = 0;
-       }
-/* L200: */
-    }
-    mtag = *tag + mdeg0;
-/*            --------------------------------------------- */
-/*            --------------------------------------------- */
-    q2head = 0;
-    qxhead = 0;
-    deg0 = 0;
-    link = elmnt;
-    istrt = xadj[link];
-    istop = xadj[link + 1] - 1;
-    i__1 = istop;
-    for (i = istrt; i <= i__1; ++i) {
-       enode = adjncy[i];
-       link = -enode;
-       if (enode < 0) {
-           goto L400;
-       } else if (enode == 0) {
-           goto L800;
-       } else {
-           goto L500;
-       }
-       if (qsize[enode] == 0) {
-           goto L700;
-       }
-       deg0 += qsize[enode];
-       marker[enode] = mtag;
-/*                        ---------------------------------- */
-/*                        IF ENODE REQUIRES A DEGREE UPDATE, */
-/*                        THEN DO THE FOLLOWING. */
-/*                        ---------------------------------- */
-       if (dbakw[enode] != 0) {
-           goto L700;
-       }
-/*                            --------------------------------------- 
-/*                            PLACE EITHER IN QXHEAD OR Q2HEAD LISTS. 
-/*                            --------------------------------------- 
-       if (dforw[enode] == 2) {
-           goto L600;
-       }
-       llist[enode] = qxhead;
-       qxhead = enode;
-       goto L700;
-       llist[enode] = q2head;
-       q2head = enode;
-       ;
-    }
-/*            -------------------------------------------- */
-/*            -------------------------------------------- */
-    enode = q2head;
-    iq2 = 1;
-    if (enode <= 0) {
-       goto L1500;
-    }
-    if (dbakw[enode] != 0) {
-       goto L2200;
-    }
-    ++(*tag);
-    deg = deg0;
-/*                    ------------------------------------------ */
-/*                    ------------------------------------------ */
-    istrt = xadj[enode];
-    nabor = adjncy[istrt];
-    if (nabor == elmnt) {
-       nabor = adjncy[istrt + 1];
-    }
-/*                    ------------------------------------------------ */
-/*                    ------------------------------------------------ */
-    link = nabor;
-    if (dforw[nabor] < 0) {
-       goto L1000;
-    }
-    deg += qsize[nabor];
-    goto L2100;
-/*                        -------------------------------------------- */
-/*                        OTHERWISE, FOR EACH NODE IN THE 2ND ELEMENT, */
-/*                        DO THE FOLLOWING. */
-/*                        -------------------------------------------- */
-    istrt = xadj[link];
-    istop = xadj[link + 1] - 1;
-    i__1 = istop;
-    for (i = istrt; i <= i__1; ++i) {
-       node = adjncy[i];
-       link = -node;
-       if (node == enode) {
-           goto L1400;
-       }
-       if (node < 0) {
-           goto L1000;
-       } else if (node == 0) {
-           goto L2100;
-       } else {
-           goto L1100;
-       }
-       if (qsize[node] == 0) {
-           goto L1400;
-       }
-       if (marker[node] >= *tag) {
-           goto L1200;
-       }
-/*                                -----------------------------------
--- */
-/*                                CASE WHEN NODE IS NOT YET CONSIDERED
-. */
-/*                                -----------------------------------
--- */
-       marker[node] = *tag;
-       deg += qsize[node];
-       goto L1400;
-/*                            ----------------------------------------
- */
-/*                            CASE WHEN NODE IS INDISTINGUISHABLE FROM
- */
-/*                            ENODE.  MERGE THEM INTO A NEW SUPERNODE.
- */
-/*                            ----------------------------------------
- */
-       if (dbakw[node] != 0) {
-           goto L1400;
-       }
-       if (dforw[node] != 2) {
-           goto L1300;
-       }
-       qsize[enode] += qsize[node];
-       qsize[node] = 0;
-       marker[node] = *maxint;
-       dforw[node] = -enode;
-       dbakw[node] = -(*maxint);
-       goto L1400;
-/*                            -------------------------------------- 
-/*                            CASE WHEN NODE IS OUTMATCHED BY ENODE. 
-/*                            -------------------------------------- 
-       if (dbakw[node] == 0) {
-           dbakw[node] = -(*maxint);
-       }
-       ;
-    }
-    goto L2100;
-/*                ------------------------------------------------ */
-/*                ------------------------------------------------ */
-    enode = qxhead;
-    iq2 = 0;
-    if (enode <= 0) {
-       goto L2300;
-    }
-    if (dbakw[enode] != 0) {
-       goto L2200;
-    }
-    ++(*tag);
-    deg = deg0;
-/*                        --------------------------------- */
-/*                        FOR EACH UNMARKED NABOR OF ENODE, */
-/*                        DO THE FOLLOWING. */
-/*                        --------------------------------- */
-    istrt = xadj[enode];
-    istop = xadj[enode + 1] - 1;
-    i__1 = istop;
-    for (i = istrt; i <= i__1; ++i) {
-       nabor = adjncy[i];
-       if (nabor == 0) {
-           goto L2100;
-       }
-       if (marker[nabor] >= *tag) {
-           goto L2000;
-       }
-       marker[nabor] = *tag;
-       link = nabor;
-/*                                ------------------------------ */
-/*                                IF UNELIMINATED, INCLUDE IT IN */
-/*                                DEG COUNT. */
-/*                                ------------------------------ */
-       if (dforw[nabor] < 0) {
-           goto L1700;
-       }
-       deg += qsize[nabor];
-       goto L2000;
-/*                                    ------------------------------- 
-/*                                    IF ELIMINATED, INCLUDE UNMARKED 
-/*                                    NODES IN THIS ELEMENT INTO THE 
-/*                                    DEGREE COUNT. */
-/*                                    ------------------------------- 
-       jstrt = xadj[link];
-       jstop = xadj[link + 1] - 1;
-       i__2 = jstop;
-       for (j = jstrt; j <= i__2; ++j) {
-           node = adjncy[j];
-           link = -node;
-           if (node < 0) {
-               goto L1700;
-           } else if (node == 0) {
-               goto L2000;
-           } else {
-               goto L1800;
-           }
-           if (marker[node] >= *tag) {
-               goto L1900;
-           }
-           marker[node] = *tag;
-           deg += qsize[node];
-           ;
-       }
-       ;
-    }
-/*                    ------------------------------------------- */
-/*                    STRUCTURE, AND MDEG (MIN DEG) IF NECESSARY. */
-/*                    ------------------------------------------- */
-    deg = deg - qsize[enode] + 1;
-    fnode = dhead[deg];
-    dforw[enode] = fnode;
-    dbakw[enode] = -deg;
-    if (fnode > 0) {
-       dbakw[fnode] = enode;
-    }
-    dhead[deg] = enode;
-    if (deg < *mdeg) {
-       *mdeg = deg;
-    }
-/*                    ---------------------------------- */
-/*                    GET NEXT ENODE IN CURRENT ELEMENT. */
-/*                    ---------------------------------- */
-    enode = llist[enode];
-    if (iq2 == 1) {
-       goto L900;
-    }
-    goto L1600;
-/*            ----------------------------- */
-/*            GET NEXT ELEMENT IN THE LIST. */
-/*            ----------------------------- */
-    *tag = mtag;
-    elmnt = llist[elmnt];
-    goto L100;
-} /* mmdupd_ */
-/* *************************************************************** */
-/* *************************************************************** */
-/* *****     MMDNUM ..... MULTI MINIMUM DEGREE NUMBERING     ***** */
-/* *************************************************************** */
-/* *************************************************************** */
-/*     AUTHOR - JOSEPH W.H. LIU */
-/*                 IF QSIZE(NODE)=0, THEN NODE HAS BEEN MERGED */
-/*                 INTO THE NODE -INVP(NODE); OTHERWISE, */
-/*                 -INVP(NODE) IS ITS INVERSE LABELLING. */
-/* *************************************************************** */
-/* Subroutine */ int mmdnum_(int *neqns, shortint *perm, shortint *invp, 
-       shortint *qsize)
-    /* System generated locals */
-    int i__1;
-    /* Local variables */
-    static int node, root, nextf, father, nqsize, num;
-/* *************************************************************** */
-/* *************************************************************** */
-    /* Parameter adjustments */
-    --qsize;
-    --invp;
-    --perm;
-    /* Function Body */
-    i__1 = *neqns;
-    for (node = 1; node <= i__1; ++node) {
-       nqsize = qsize[node];
-       if (nqsize <= 0) {
-           perm[node] = invp[node];
-       }
-       if (nqsize > 0) {
-           perm[node] = -invp[node];
-       }
-/* L100: */
-    }
-/*        ------------------------------------------------------ */
-/*        ------------------------------------------------------ */
-    i__1 = *neqns;
-    for (node = 1; node <= i__1; ++node) {
-       if (perm[node] > 0) {
-           goto L500;
-       }
-/*                ----------------------------------------- */
-/*                NOT BEEN MERGED, CALL IT ROOT. */
-/*                ----------------------------------------- */
-       father = node;
-       if (perm[father] > 0) {
-           goto L300;
-       }
-       father = -perm[father];
-       goto L200;
-/*                ----------------------- */
-/*                NUMBER NODE AFTER ROOT. */
-/*                ----------------------- */
-       root = father;
-       num = perm[root] + 1;
-       invp[node] = -num;
-       perm[root] = num;
-/*                ------------------------ */
-/*                SHORTEN THE MERGED TREE. */
-/*                ------------------------ */
-       father = node;
-       nextf = -perm[father];
-       if (nextf <= 0) {
-           goto L500;
-       }
-       perm[father] = -root;
-       father = nextf;
-       goto L400;
-       ;
-    }
-/*        ---------------------- */
-/*        READY TO COMPUTE PERM. */
-/*        ---------------------- */
-    i__1 = *neqns;
-    for (node = 1; node <= i__1; ++node) {
-       num = -invp[node];
-       invp[node] = num;
-       perm[num] = node;
-/* L600: */
-    }
-    return 0;
-} /* mmdnum_ */
diff --git a/superlu/relax_snode.c b/superlu/relax_snode.c
deleted file mode 100644
index 8937ac03..00000000
--- a/superlu/relax_snode.c
+++ /dev/null
@@ -1,80 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_ddefs.h"
-relax_snode (
-            const     int n,
-            int       *et,           /* column elimination tree */
-            const int relax_columns, /* max no of columns allowed in a
-                                        relaxed snode */
-            int       *descendants,  /* no of descendants of each node
-                                        in the etree */
-            int       *relax_end     /* last column in a supernode */
-            )
- * Purpose
- * =======
- *    relax_snode() - Identify the initial relaxed supernodes, assuming that 
- *    the matrix has been reordered according to the postorder of the etree.
- *
- */ 
-    register int j, parent;
-    register int snode_start;  /* beginning of a snode */
-    ifill (relax_end, n, EMPTY);
-    for (j = 0; j < n; j++) descendants[j] = 0;
-    /* Compute the number of descendants of each node in the etree */
-    for (j = 0; j < n; j++) {
-       parent = et[j];
-       if ( parent != n )  /* not the dummy root */
-           descendants[parent] += descendants[j] + 1;
-    }
-    /* Identify the relaxed supernodes by postorder traversal of the etree. */
-    for (j = 0; j < n; ) { 
-       parent = et[j];
-        snode_start = j;
-       while ( parent != n && descendants[parent] < relax_columns ) {
-           j = parent;
-           parent = et[j];
-       }
-       /* Found a supernode with j being the last column. */
-       relax_end[snode_start] = j;             /* Last column is recorded */
-       j++;
-       /* Search for a new leaf */
-       while ( descendants[j] != 0 && j < n ) j++;
-    }
-    /*printf("No of relaxed snodes: %d; relaxed columns: %d\n", 
-               nsuper, no_relaxed_col); */
diff --git a/superlu/scolumn_bmod.c b/superlu/scolumn_bmod.c
deleted file mode 100644
index 64a1d0b4..00000000
--- a/superlu/scolumn_bmod.c
+++ /dev/null
@@ -1,360 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_sdefs.h"
-extern void strsv_();
-extern void sgemv_();
- * Function prototypes 
- */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-/* Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-scolumn_bmod (
-            const int  jcol,     /* in */
-            const int  nseg,     /* in */
-            float     *dense,    /* in */
-            float     *tempv,    /* working array */
-            int        *segrep,  /* in */
-            int        *repfnz,  /* in */
-            int        fpanelc,  /* in -- first column in the current panel */
-            GlobalLU_t *Glu,     /* modified */
-            SuperLUStat_t *stat  /* output */
-            )
- * Purpose:
- * ========
- *    Performs numeric block updates (sup-col) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-    int         incx = 1, incy = 1;
-    float      alpha, beta;
-    /* krep = representative of current k-th supernode
-     * fsupc = first supernodal column
-     * nsupc = no of columns in supernode
-     * nsupr = no of rows in supernode (used as leading dimension)
-     * luptr = location of supernodal LU-block in storage
-     * kfnz = first nonz in the k-th supernodal segment
-     * no_zeros = no of leading zeros in a supernodal U-segment
-     */
-    float       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          fsupc, nsupc, nsupr, segsze;
-    int          nrow;   /* No of rows in the matrix of matrix-vector */
-    int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
-    register int lptr, kfnz, isub, irow, i;
-    register int no_zeros, new_next; 
-    int          ufirst, nextlu;
-    int          fst_col; /* First column within small LU update */
-    int          d_fsupc; /* Distance between the first column of the current
-                            panel and the first column of the current snode. */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    float       *lusup;
-    int          *xlusup;
-    int          nzlumax;
-    float       *tempv1;
-    float      zero = 0.0;
-    float      one = 1.0;
-    float      none = -1.0;
-    int          mem_error;
-    flops_t      *ops = stat->ops;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nzlumax = Glu->nzlumax;
-    jcolp1 = jcol + 1;
-    jsupno = supno[jcol];
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k];
-       k--;
-       ksupno = supno[krep];
-       if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
-           fsupc = xsup[ksupno];
-           fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-           /* Distance from the current supernode to the current panel; 
-              d_fsupc=0 if fsupc > fpanelc. */
-           d_fsupc = fst_col - fsupc; 
-           luptr = xlusup[fst_col] + d_fsupc;
-           lptr = xlsub[fsupc] + d_fsupc;
-           kfnz = repfnz[krep];
-           kfnz = SUPERLU_MAX ( kfnz, fpanelc );
-           segsze = krep - kfnz + 1;
-           nsupc = krep - fst_col + 1;
-           nsupr = xlsub[fsupc+1] - xlsub[fsupc];      /* Leading dimension */
-           nrow = nsupr - d_fsupc - nsupc;
-           krep_ind = lptr + nsupc - 1;
-           ops[TRSV] += segsze * (segsze - 1);
-           ops[GEMV] += 2 * nrow * segsze;
-           /* 
-            * Case 1: Update U-segment of size 1 -- col-col update 
-            */
-           if ( segsze == 1 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc;
-               for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                   irow = lsub[i];
-                   dense[irow] -=  ukj*lusup[luptr];
-                   luptr++;
-               }
-           } else if ( segsze <= 3 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc-1;
-               ukj1 = dense[lsub[krep_ind - 1]];
-               luptr1 = luptr - nsupr;
-               if ( segsze == 2 ) { /* Case 2: 2cols-col update */
-                   ukj -= ukj1 * lusup[luptr1];
-                   dense[lsub[krep_ind]] = ukj;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       dense[irow] -= ( ukj*lusup[luptr]
-                                       + ukj1*lusup[luptr1] );
-                   }
-               } else { /* Case 3: 3cols-col update */
-                   ukj2 = dense[lsub[krep_ind - 2]];
-                   luptr2 = luptr1 - nsupr;
-                   ukj1 -= ukj2 * lusup[luptr2-1];
-                   ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                   dense[lsub[krep_ind]] = ukj;
-                   dense[lsub[krep_ind-1]] = ukj1;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       luptr2++;
-                       dense[irow] -= ( ukj*lusup[luptr]
-                            + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                   }
-               }
-           } else {
-               /*
-                * Case: sup-col update
-                * Perform a triangular solve and block update,
-                * then scatter the result of sup-col update to dense
-                */
-               no_zeros = kfnz - fst_col;
-               /* Copy U[*,j] segment from dense[*] to tempv[*] */
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   tempv[i] = dense[irow];
-                   ++isub; 
-               }
-               /* Dense triangular solve -- start effective triangle */
-               luptr += nsupr * no_zeros + no_zeros; 
-#ifdef _CRAY
-               STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-               strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-                alpha = one;
-                beta = zero;
-#ifdef _CRAY
-               SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-               sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-               slsolve ( nsupr, segsze, &lusup[luptr], tempv );
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-               smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
-                /* Scatter tempv[] into SPA dense[] as a temporary storage */
-                isub = lptr + no_zeros;
-                for (i = 0; i < segsze; i++) {
-                    irow = lsub[isub];
-                    dense[irow] = tempv[i];
-                    tempv[i] = zero;
-                    ++isub;
-                }
-               /* Scatter tempv1[] into SPA dense[] */
-               for (i = 0; i < nrow; i++) {
-                   irow = lsub[isub];
-                   dense[irow] -= tempv1[i];
-                   tempv1[i] = zero;
-                   ++isub;
-               }
-           }
-       } /* if jsupno ... */
-    } /* for each segment... */
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    nextlu = xlusup[jcol];
-    fsupc = xsup[jsupno];
-    /* Copy the SPA dense into L\U[*,j] */
-    new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
-    while ( new_next > nzlumax ) {
-       if (mem_error = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
-           return (mem_error);
-       lusup = Glu->lusup;
-       lsub = Glu->lsub;
-    }
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = zero;
-       ++nextlu;
-    }
-    xlusup[jcolp1] = nextlu;   /* Close L\U[*,jcol] */
-    /* For more updates within the panel (also within the current supernode), 
-     * should start from the first column of the panel, or the first column 
-     * of the supernode, whichever is bigger. There are 2 cases:
-     *    1) fsupc < fpanelc, then fst_col := fpanelc
-     *    2) fsupc >= fpanelc, then fst_col := fsupc
-     */
-    fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-    if ( fst_col < jcol ) {
-       /* Distance between the current supernode and the current panel.
-          d_fsupc=0 if fsupc >= fpanelc. */
-       d_fsupc = fst_col - fsupc;
-       lptr = xlsub[fsupc] + d_fsupc;
-       luptr = xlusup[fst_col] + d_fsupc;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];  /* Leading dimension */
-       nsupc = jcol - fst_col; /* Excluding jcol */
-       nrow = nsupr - d_fsupc - nsupc;
-       /* Points to the beginning of jcol in snode L\U(jsupno) */
-       ufirst = xlusup[jcol] + d_fsupc;        
-       ops[TRSV] += nsupc * (nsupc - 1);
-       ops[GEMV] += 2 * nrow * nsupc;
-#ifdef _CRAY
-       STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-       strsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-       alpha = none; beta = one; /* y := beta*y + alpha*A*x */
-#ifdef _CRAY
-       SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
-               &lusup[ufirst], tempv );
-        /* Copy updates from tempv[*] into lusup[*] */
-       isub = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           lusup[isub] -= tempv[i];
-           tempv[i] = 0.0;
-           ++isub;
-       }
-    } /* if fst_col < jcol ... */ 
-    return 0;
diff --git a/superlu/scolumn_dfs.c b/superlu/scolumn_dfs.c
deleted file mode 100644
index a9bd3c2f..00000000
--- a/superlu/scolumn_dfs.c
+++ /dev/null
@@ -1,278 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_sdefs.h"
-/* What type of supernodes we want */
-#define T2_SUPER
-          const int  m,         /* in - number of rows in the matrix */
-          const int  jcol,      /* in */
-          int        *perm_r,   /* in */
-          int        *nseg,     /* modified - with new segments appended */
-          int        *lsub_col, /* in - defines the RHS vector to start the 
dfs */
-          int        *segrep,   /* modified - with new segments appended */
-          int        *repfnz,   /* modified */
-          int        *xprune,   /* modified */
-          int        *marker,   /* modified */
-          int        *parent,   /* working array */
-          int        *xplore,   /* working array */
-          GlobalLU_t *Glu       /* modified */
-          )
- * Purpose
- * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
- *   decide the supernode boundary.
- *
- *   This routine does not use numeric values, but only use the RHS 
- *   row indices to start the dfs.
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives. The routine returns a list of such supernodal 
- *   representatives in topological order of the dfs that generates them.
- *   The location of the first nonzero in each such supernodal segment
- *   (supernodal entry location) is also returned.
- *
- * Local parameters
- * ================
- *   nseg: no of segments in current U[*,j]
- *   jsuper: jsuper=EMPTY if column j does not belong to the same
- *     supernode as j-1. Otherwise, jsuper=nsuper.
- *
- *   marker2: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- * Return value
- * ============
- *     0  success;
- *   > 0  number of bytes allocated when run out of space.
- *
- */
-    int     jcolp1, jcolm1, jsuper, nsuper, nextl;
-    int     k, krep, krow, kmark, kperm;
-    int     *marker2;           /* Used for small panel LU */
-    int            fsupc;              /* First column of a snode */
-    int     myfnz;             /* First nonz column of a U-segment */
-    int            chperm, chmark, chrep, kchild;
-    int     xdfs, maxdfs, kpar, oldrep;
-    int     jptr, jm1ptr;
-    int     ito, ifrom, istop; /* Used to compress row subscripts */
-    int     mem_error;
-    int     *xsup, *supno, *lsub, *xlsub;
-    int     nzlmax;
-    static  int  first = 1, maxsuper;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       first = 0;
-    }
-    jcolp1  = jcol + 1;
-    jcolm1  = jcol - 1;
-    nsuper  = supno[jcol];
-    jsuper  = nsuper;
-    nextl   = xlsub[jcol];
-    marker2 = &marker[2*m];
-    /* For each nonzero in A[*,jcol] do dfs */
-    for (k = 0; lsub_col[k] != EMPTY; k++) {
-       krow = lsub_col[k];
-       lsub_col[k] = EMPTY;
-       kmark = marker2[krow];          
-       /* krow was visited before, go to the next nonz */
-        if ( kmark == jcol ) continue; 
-       /* For each unmarked nbr krow of jcol
-        *      krow is in L: place it in structure of L[*,jcol]
-        */
-       marker2[krow] = jcol;
-       kperm = perm_r[krow];
-       if ( kperm == EMPTY ) {
-           lsub[nextl++] = krow;       /* krow is indexed into A */
-           if ( nextl >= nzlmax ) {
-               if ( mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-                   return (mem_error);
-               lsub = Glu->lsub;
-           }
-            if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing 
-       } else {
-           /*  krow is in U: if its supernode-rep krep
-            *  has been explored, update repfnz[*]
-            */
-           krep = xsup[supno[kperm]+1] - 1;
-           myfnz = repfnz[krep];
-           if ( myfnz != EMPTY ) {     /* Visited before */
-               if ( myfnz > kperm ) repfnz[krep] = kperm;
-               /* continue; */
-           }
-           else {
-               /* Otherwise, perform dfs starting at krep */
-               oldrep = EMPTY;
-               parent[krep] = oldrep;
-               repfnz[krep] = kperm;
-               xdfs = xlsub[krep];
-               maxdfs = xprune[krep];
-               do {
-                   /* 
-                    * For each unmarked kchild of krep 
-                    */
-                   while ( xdfs < maxdfs ) {
-                       kchild = lsub[xdfs];
-                       xdfs++;
-                       chmark = marker2[kchild];
-                       if ( chmark != jcol ) { /* Not reached yet */
-                           marker2[kchild] = jcol;
-                           chperm = perm_r[kchild];
-                           /* Case kchild is in L: place it in L[*,k] */
-                           if ( chperm == EMPTY ) {
-                               lsub[nextl++] = kchild;
-                               if ( nextl >= nzlmax ) {
-                                   if ( mem_error =
sLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu) )
-                                       return (mem_error);
-                                   lsub = Glu->lsub;
-                               }
-                               if ( chmark != jcolm1 ) jsuper = EMPTY;
-                           } else {
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               chrep = xsup[supno[chperm]+1] - 1;
-                               myfnz = repfnz[chrep];
-                               if ( myfnz != EMPTY ) { /* Visited before */
-                                   if ( myfnz > chperm )
-                                       repfnz[chrep] = chperm;
-                               } else {
-                                   /* Continue dfs at super-rep of kchild */
-                                   xplore[krep] = xdfs;        
-                                   oldrep = krep;
-                                   krep = chrep; /* Go deeper down G(L^t) */
-                                   parent[krep] = oldrep;
-                                   repfnz[krep] = chperm;
-                                   xdfs = xlsub[krep];     
-                                   maxdfs = xprune[krep];
-                               } /* else */
-                          } /* else */
-                       } /* if */
-                   } /* while */
-                   /* krow has no more unexplored nbrs;
-                    *    place supernode-rep krep in postorder DFS.
-                    *    backtrack dfs to its parent
-                    */
-                   segrep[*nseg] = krep;
-                   ++(*nseg);
-                   kpar = parent[krep]; /* Pop from stack, mimic recursion */
-                   if ( kpar == EMPTY ) break; /* dfs done */
-                   krep = kpar;
-                   xdfs = xplore[krep];
-                   maxdfs = xprune[krep];
-               } while ( kpar != EMPTY );      /* Until empty stack */
-           } /* else */
-       } /* else */
-    } /* for each nonzero ... */
-    /* Check to see if j belongs in the same supernode as j-1 */
-    if ( jcol == 0 ) { /* Do nothing for column 0 */
-       nsuper = supno[0] = 0;
-    } else {
-       fsupc = xsup[nsuper];
-       jptr = xlsub[jcol];     /* Not compressed yet */
-       jm1ptr = xlsub[jcolm1];
-#ifdef T2_SUPER
-       if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
-       /* Make sure the number of columns in a supernode doesn't
-          exceed threshold. */
-       if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
-       /* If jcol starts a new supernode, reclaim storage space in
-        * lsub from the previous supernode. Note we only store
-        * the subscript set of the first and last columns of
-        * a supernode. (first for num values, last for pruning)
-        */
-       if ( jsuper == EMPTY ) {        /* starts a new supernode */
-           if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
-               printf("  Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
-               ito = xlsub[fsupc+1];
-               xlsub[jcolm1] = ito;
-               istop = ito + jptr - jm1ptr;
-               xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
-               xlsub[jcol] = istop;
-               for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
-                   lsub[ito] = lsub[ifrom];
-               nextl = ito;            /* = istop + length(jcol) */
-           }
-           nsuper++;
-           supno[jcol] = nsuper;
-       } /* if a new supernode */
-    }  /* else: jcol > 0 */ 
-    /* Tidy up the pointers before exit */
-    xsup[nsuper+1] = jcolp1;
-    supno[jcolp1]  = nsuper;
-    xprune[jcol]   = nextl;    /* Initialize upper bound for pruning */
-    xlsub[jcolp1]  = nextl;
-    return 0;
diff --git a/superlu/scomplex.c b/superlu/scomplex.c
deleted file mode 100644
index d916f062..00000000
--- a/superlu/scomplex.c
+++ /dev/null
@@ -1,127 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * This file defines common arithmetic operations for complex type.
- */
-#include <math.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "slu_scomplex.h"
-/* Complex Division c = a/b */
-void c_div(complex *c, complex *a, complex *b)
-    float ratio, den;
-    float abr, abi, cr, ci;
-    if( (abr = b->r) < 0.)
-       abr = - abr;
-    if( (abi = b->i) < 0.)
-       abi = - abi;
-    if( abr <= abi ) {
-       if (abi == 0) {
-           fprintf(stderr, "z_div.c: division by zero\n");
-            exit(-1);
-       }         
-       ratio = b->r / b->i ;
-       den = b->i * (1 + ratio*ratio);
-       cr = (a->r*ratio + a->i) / den;
-       ci = (a->i*ratio - a->r) / den;
-    } else {
-       ratio = b->i / b->r ;
-       den = b->r * (1 + ratio*ratio);
-       cr = (a->r + a->i*ratio) / den;
-       ci = (a->i - a->r*ratio) / den;
-    }
-    c->r = cr;
-    c->i = ci;
-/* Returns sqrt(z.r^2 + z.i^2) */
-double c_abs(complex *z)
-    float temp;
-    float real = z->r;
-    float imag = z->i;
-    if (real < 0) real = -real;
-    if (imag < 0) imag = -imag;
-    if (imag > real) {
-       temp = real;
-       real = imag;
-       imag = temp;
-    }
-    if ((real+imag) == real) return(real);
-    temp = imag/real;
-    temp = real*sqrt(1.0 + temp*temp);  /*overflow!!*/
-    return (temp);
-/* Approximates the abs */
-/* Returns abs(z.r) + abs(z.i) */
-double c_abs1(complex *z)
-    float real = z->r;
-    float imag = z->i;
-    if (real < 0) real = -real;
-    if (imag < 0) imag = -imag;
-    return (real + imag);
-/* Return the exponentiation */
-void c_exp(complex *r, complex *z)
-    float expx;
-    expx = exp(z->r);
-    r->r = expx * cos(z->i);
-    r->i = expx * sin(z->i);
-/* Return the complex conjugate */
-void r_cnjg(complex *r, complex *z)
-    r->r = z->r;
-    r->i = -z->i;
-/* Return the imaginary part */
-double r_imag(complex *z)
-    return (z->i);
diff --git a/superlu/scopy_to_ucol.c b/superlu/scopy_to_ucol.c
deleted file mode 100644
index 44a237e5..00000000
--- a/superlu/scopy_to_ucol.c
+++ /dev/null
@@ -1,112 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_sdefs.h"
-             int        jcol,    /* in */
-             int        nseg,    /* in */
-             int        *segrep,  /* in */
-             int        *repfnz,  /* in */
-             int        *perm_r,  /* in */
-             float     *dense,   /* modified - reset to zero on return */
-             GlobalLU_t *Glu      /* modified */
-             )
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int ksub, krep, ksupno;
-    int i, k, kfnz, segsze;
-    int fsupc, isub, irow;
-    int jsupno, nextu;
-    int new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    float    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-    float zero = 0.0;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k--];
-       ksupno = supno[krep];
-       if ( ksupno != jsupno ) { /* Should go into ucol[] */
-           kfnz = repfnz[krep];
-           if ( kfnz != EMPTY ) {      /* Nonzero U-segment */
-               fsupc = xsup[ksupno];
-               isub = xlsub[fsupc] + kfnz - fsupc;
-               segsze = krep - kfnz + 1;
-               new_next = nextu + segsze;
-               while ( new_next > nzumax ) {
-                   if (mem_error = sLUMemXpand(jcol, nextu, UCOL, &nzumax, 
-                       return (mem_error);
-                   ucol = Glu->ucol;
-                   if (mem_error = sLUMemXpand(jcol, nextu, USUB, &nzumax, 
-                       return (mem_error);
-                   usub = Glu->usub;
-                   lsub = Glu->lsub;
-               }
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   usub[nextu] = perm_r[irow];
-                   ucol[nextu] = dense[irow];
-                   dense[irow] = zero;
-                   nextu++;
-                   isub++;
-               } 
-           }
-       }
-    } /* for each segment... */
-    xusub[jcol + 1] = nextu;      /* Close U[*,jcol] */
-    return 0;
diff --git a/superlu/scsum1.c b/superlu/scsum1.c
deleted file mode 100644
index 42ebfe32..00000000
--- a/superlu/scsum1.c
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "slu_Cnames.h"
-#include "slu_scomplex.h"
-double scsum1_(int *n, complex *cx, int *incx)
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-/*! @file scsum1.c
- * \brief Takes sum of the absolute values of a complex vector and returns a 
single precision result
- *
- * <pre>
- *     -- LAPACK auxiliary routine (version 2.0) --   
- *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
- *     Courant Institute, Argonne National Lab, and Rice University   
- *     October 31, 1992   
- * </pre>
- */
-    Purpose   
-    =======   
-    SCSUM1 takes the sum of the absolute values of a complex   
-    vector and returns a single precision result.   
-    Based on SCASUM from the Level 1 BLAS.   
-    The change is to use the 'genuine' absolute value.   
-    Contributed by Nick Higham for use with CLACON.   
-    Arguments   
-    =========   
-    N       (input) INT
-            The number of elements in the vector CX.   
-    CX      (input) COMPLEX array, dimension (N)   
-            The vector whose elements will be summed.   
-    INCX    (input) INT
-            The spacing between successive values of CX.  INCX > 0.   
-    ===================================================================== 
-   Parameter adjustments   
-       Function Body */
-    /* System generated locals */
-    int i__1, i__2;
-    float ret_val;
-    /* Builtin functions */
-    double c_abs(complex *);
-    /* Local variables */
-    static int i, nincx;
-    static float stemp;
-#define CX(I) cx[(I)-1]
-    ret_val = 0.f;
-    stemp = 0.f;
-    if (*n <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) {
-/*        NEXT LINE MODIFIED. */
-       stemp += c_abs(&CX(i));
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-    i__2 = *n;
-    for (i = 1; i <= *n; ++i) {
-/*        NEXT LINE MODIFIED. */
-       stemp += c_abs(&CX(i));
-/* L30: */
-    }
-    ret_val = stemp;
-    return ret_val;
-/*     End of SCSUM1 */
-} /* scsum1_ */
diff --git a/superlu/sgscon.c b/superlu/sgscon.c
deleted file mode 100644
index cc44712e..00000000
--- a/superlu/sgscon.c
+++ /dev/null
@@ -1,155 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * File name:  sgscon.c
- * History:     Modified from lapack routines SGECON.
- */
-#include <math.h>
-#include "slu_sdefs.h"
-sgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
-       float anorm, float *rcond, SuperLUStat_t *stat, int *info)
-    Purpose   
-    =======   
-    SGSCON estimates the reciprocal of the condition number of a general 
-    real matrix A, in either the 1-norm or the infinity-norm, using   
-    the LU factorization computed by SGETRF.   
-    An estimate is obtained for norm(inv(A)), and the reciprocal of the   
-    condition number is computed as   
-       RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    NORM    (input) char*
-            Specifies whether the 1-norm condition number or the   
-            infinity-norm condition number is required:   
-            = '1' or 'O':  1-norm;   
-            = 'I':         Infinity-norm.
-    L       (input) SuperMatrix*
-            The factor L from the factorization Pr*A*Pc=L*U as computed by
-            sgstrf(). Use compressed row subscripts storage for supernodes,
-            i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
-    U       (input) SuperMatrix*
-            The factor U from the factorization Pr*A*Pc=L*U as computed by
-            sgstrf(). Use column-wise storage scheme, i.e., U has types:
-            Stype = SLU_NC, Dtype = SLU_S, Mtype = TRU.
-    ANORM   (input) float
-            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
-            If NORM = 'I', the infinity-norm of the original matrix A.
-    RCOND   (output) float*
-            The reciprocal of the condition number of the matrix A,   
-            computed as RCOND = 1/(norm(A) * norm(inv(A))).
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-    ===================================================================== 
-    /* Local variables */
-    int    kase, kase1, onenrm, i;
-    float ainvnm;
-    float *work;
-    int    *iwork;
-    extern int srscl_(int *, float *, float *, int *);
-    extern int slacon_(int *, float *, float *, int *, float *, int *);
-    /* Test the input parameters. */
-    *info = 0;
-    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
-    if (! onenrm && ! lsame_(norm, "I")) *info = -1;
-    else if (L->nrow < 0 || L->nrow != L->ncol ||
-             L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU)
-        *info = -2;
-    else if (U->nrow < 0 || U->nrow != U->ncol ||
-             U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU) 
-       *info = -3;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("sgscon", &i);
-       return;
-    }
-    /* Quick return if possible */
-    *rcond = 0.;
-    if ( L->nrow == 0 || U->nrow == 0) {
-       *rcond = 1.;
-       return;
-    }
-    work = floatCalloc( 3*L->nrow );
-    iwork = intMalloc( L->nrow );
-    if ( !work || !iwork )
-       ABORT("Malloc fails for work arrays in sgscon.");
-    /* Estimate the norm of inv(A). */
-    ainvnm = 0.;
-    if ( onenrm ) kase1 = 1;
-    else kase1 = 2;
-    kase = 0;
-    do {
-       slacon_(&L->nrow, &work[L->nrow], &work[0], &iwork[0], &ainvnm, &kase);
-       if (kase == 0) break;
-       if (kase == kase1) {
-           /* Multiply by inv(L). */
-           sp_strsv("L", "No trans", "Unit", L, U, &work[0], stat, info);
-           /* Multiply by inv(U). */
-           sp_strsv("U", "No trans", "Non-unit", L, U, &work[0], stat, info);
-       } else {
-           /* Multiply by inv(U'). */
-           sp_strsv("U", "Transpose", "Non-unit", L, U, &work[0], stat, info);
-           /* Multiply by inv(L'). */
-           sp_strsv("L", "Transpose", "Unit", L, U, &work[0], stat, info);
-       }
-    } while ( kase != 0 );
-    /* Compute the estimate of the reciprocal condition number. */
-    if (ainvnm != 0.) *rcond = (1. / ainvnm) / anorm;
-    SUPERLU_FREE (work);
-    SUPERLU_FREE (iwork);
-    return;
-} /* sgscon */
diff --git a/superlu/sgsequ.c b/superlu/sgsequ.c
deleted file mode 100644
index b24e147f..00000000
--- a/superlu/sgsequ.c
+++ /dev/null
@@ -1,205 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  sgsequ.c
- * History:     Modified from LAPACK routine SGEEQU
- */
-#include <math.h>
-#include "slu_sdefs.h"
-sgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
-       float *colcnd, float *amax, int *info)
-    Purpose   
-    =======   
-    SGSEQU computes row and column scalings intended to equilibrate an   
-    M-by-N sparse matrix A and reduce its condition number. R returns the row
-    scale factors and C the column scale factors, chosen to try to make   
-    the largest element in each row and column of the matrix B with   
-    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
-    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
-    number and BIGNUM = largest safe number.  Use of these scaling   
-    factors is not guaranteed to reduce the condition number of A but   
-    works well in practice.   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    A       (input) SuperMatrix*
-            The matrix of dimension (A->nrow, A->ncol) whose equilibration
-            factors are to be computed. The type of A can be:
-            Stype = SLU_NC; Dtype = SLU_S; Mtype = SLU_GE.
-    R       (output) float*, size A->nrow
-            If INFO = 0 or INFO > M, R contains the row scale factors   
-            for A.
-    C       (output) float*, size A->ncol
-            If INFO = 0,  C contains the column scale factors for A.
-    ROWCND  (output) float*
-            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
-            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
-            AMAX is neither too large nor too small, it is not worth   
-            scaling by R.
-    COLCND  (output) float*
-            If INFO = 0, COLCND contains the ratio of the smallest   
-            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
-            worth scaling by C.
-    AMAX    (output) float*
-            Absolute value of largest matrix element.  If AMAX is very   
-            close to overflow or very close to underflow, the matrix   
-            should be scaled.
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-            > 0:  if INFO = i,  and i is   
-                  <= A->nrow:  the i-th row of A is exactly zero   
-                  >  A->ncol:  the (i-M)-th column of A is exactly zero   
-    ===================================================================== 
-    /* Local variables */
-    NCformat *Astore;
-    float   *Aval;
-    int i, j, irow;
-    float rcmin, rcmax;
-    float bignum, smlnum;
-    extern double slamch_(char *);
-    /* Test the input parameters. */
-    *info = 0;
-    if ( A->nrow < 0 || A->ncol < 0 ||
-        A->Stype != SLU_NC || A->Dtype != SLU_S || A->Mtype != SLU_GE )
-       *info = -1;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("sgsequ", &i);
-       return;
-    }
-    /* Quick return if possible */
-    if ( A->nrow == 0 || A->ncol == 0 ) {
-       *rowcnd = 1.;
-       *colcnd = 1.;
-       *amax = 0.;
-       return;
-    }
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Get machine constants. */
-    smlnum = slamch_("S");
-    bignum = 1. / smlnum;
-    /* Compute row scale factors. */
-    for (i = 0; i < A->nrow; ++i) r[i] = 0.;
-    /* Find the maximum element in each row. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           r[irow] = SUPERLU_MAX( r[irow], fabs(Aval[i]) );
-       }
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (i = 0; i < A->nrow; ++i) {
-       rcmax = SUPERLU_MAX(rcmax, r[i]);
-       rcmin = SUPERLU_MIN(rcmin, r[i]);
-    }
-    *amax = rcmax;
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (i = 0; i < A->nrow; ++i)
-           if (r[i] == 0.) {
-               *info = i + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (i = 0; i < A->nrow; ++i)
-           r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum );
-       /* Compute ROWCND = min(R(I)) / max(R(I)) */
-       *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-    /* Compute column scale factors */
-    for (j = 0; j < A->ncol; ++j) c[j] = 0.;
-    /* Find the maximum element in each column, assuming the row
-       scalings computed above. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           c[j] = SUPERLU_MAX( c[j], fabs(Aval[i]) * r[irow] );
-       }
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (j = 0; j < A->ncol; ++j) {
-       rcmax = SUPERLU_MAX(rcmax, c[j]);
-       rcmin = SUPERLU_MIN(rcmin, c[j]);
-    }
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (j = 0; j < A->ncol; ++j)
-           if ( c[j] == 0. ) {
-               *info = A->nrow + j + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (j = 0; j < A->ncol; ++j)
-           c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum);
-       /* Compute COLCND = min(C(J)) / max(C(J)) */
-       *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-    return;
-} /* sgsequ */
diff --git a/superlu/sgsrfs.c b/superlu/sgsrfs.c
deleted file mode 100644
index c95c4433..00000000
--- a/superlu/sgsrfs.c
+++ /dev/null
@@ -1,446 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * File name:  sgsrfs.c
- * History:     Modified from lapack routine SGERFS
- */
-#include <math.h>
-#include "slu_sdefs.h"
-sgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
-       int *perm_c, int *perm_r, char *equed, float *R, float *C,
-       SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr,
-       SuperLUStat_t *stat, int *info)
- *   Purpose   
- *   =======   
- *
- *   SGSRFS improves the computed solution to a system of linear   
- *   equations and provides error bounds and backward error estimates for 
- *   the solution.   
- *
- *   If equilibration was performed, the system becomes:
- *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- *   Arguments   
- *   =========   
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *   
- *   A       (input) SuperMatrix*
- *           The original matrix A in the system, or the scaled A if
- *           equilibration was done. The type of A can be:
- *           Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_GE.
- *    
- *   L       (input) SuperMatrix*
- *          The factor L from the factorization Pr*A*Pc=L*U. Use
- *           compressed row subscripts storage for supernodes, 
- *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = 
- * 
- *   U       (input) SuperMatrix*
- *           The factor U from the factorization Pr*A*Pc=L*U as computed by
- *           sgstrf(). Use column-wise storage scheme, 
- *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
- *
- *   perm_c  (input) int*, dimension (A->ncol)
- *          Column permutation vector, which defines the 
- *           permutation matrix Pc; perm_c[i] = j means column i of A is 
- *           in position j in A*Pc.
- *
- *   perm_r  (input) int*, dimension (A->nrow)
- *           Row permutation vector, which defines the permutation matrix Pr;
- *           perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- *   equed   (input) Specifies the form of equilibration that was done.
- *           = 'N': No equilibration.
- *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *           = 'C': Column equilibration, i.e., A was postmultiplied by
- *                  diag(C).
- *           = 'B': Both row and column equilibration, i.e., A was replaced 
- *                  by diag(R)*A*diag(C).
- *
- *   R       (input) float*, dimension (A->nrow)
- *           The row scale factors for A.
- *           If equed = 'R' or 'B', A is premultiplied by diag(R).
- *           If equed = 'N' or 'C', R is not accessed.
- * 
- *   C       (input) float*, dimension (A->ncol)
- *           The column scale factors for A.
- *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
- *           If equed = 'N' or 'R', C is not accessed.
- *
- *   B       (input) SuperMatrix*
- *           B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- *           The right hand side matrix B.
- *           if equed = 'R' or 'B', B is premultiplied by diag(R).
- *
- *   X       (input/output) SuperMatrix*
- *           X has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- *           On entry, the solution matrix X, as computed by sgstrs().
- *           On exit, the improved solution matrix X.
- *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
- *               in order to obtain the solution to the original system.
- *
- *   FERR    (output) float*, dimension (B->ncol)   
- *           The estimated forward error bound for each solution vector   
- *           X(j) (the j-th column of the solution matrix X).   
- *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *           is an estimated upper bound for the magnitude of the largest 
- *           element in (X(j) - XTRUE) divided by the magnitude of the   
- *           largest element in X(j).  The estimate is as reliable as   
- *           the estimate for RCOND, and is almost always a slight   
- *           overestimate of the true error.
- *
- *   BERR    (output) float*, dimension (B->ncol)   
- *           The componentwise relative backward error of each solution   
- *           vector X(j) (i.e., the smallest relative change in   
- *           any element of A or B that makes X(j) an exact solution).
- *
- *   stat     (output) SuperLUStat_t*
- *            Record the statistics on runtime and floating-point operation 
- *            See util.h for the definition of 'SuperLUStat_t'.
- *
- *   info    (output) int*   
- *           = 0:  successful exit   
- *            < 0:  if INFO = -i, the i-th argument had an illegal value   
- *
- *    Internal Parameters   
- *    ===================   
- *
- *    ITMAX is the maximum number of steps of iterative refinement.   
- *
- */  
-#define ITMAX 5
-    /* Table of constant values */
-    int    ione = 1;
-    float ndone = -1.;
-    float done = 1.;
-    /* Local variables */
-    NCformat *Astore;
-    float   *Aval;
-    SuperMatrix Bjcol;
-    DNformat *Bstore, *Xstore, *Bjcol_store;
-    float   *Bmat, *Xmat, *Bptr, *Xptr;
-    int      kase;
-    float   safe1, safe2;
-    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
-    int      ldb, ldx, nrhs;
-    float   s, xk, lstres, eps, safmin;
-    char     transc[1];
-    trans_t  transt;
-    float   *work;
-    float   *rwork;
-    int      *iwork;
-    extern double slamch_(char *);
-    extern int slacon_(int *, float *, float *, int *, float *, int *);
-#ifdef _CRAY
-    extern int SCOPY(int *, float *, int *, float *, int *);
-    extern int SSAXPY(int *, float *, float *, int *, float *, int *);
-    extern int scopy_(int *, float *, int *, float *, int *);
-    extern int saxpy_(int *, float *, float *, int *, float *, int *);
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    /* Test the input parameters */
-    *info = 0;
-    notran = (trans == NOTRANS);
-    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             A->Stype != SLU_NC || A->Dtype != SLU_S || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU )
-       *info = -3;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU )
-       *info = -4;
-    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
-        *info = -10;
-    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
-             X->Stype != SLU_DN || X->Dtype != SLU_S || X->Mtype != SLU_GE )
-       *info = -11;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("sgsrfs", &i);
-       return;
-    }
-    /* Quick return if possible */
-    if ( A->nrow == 0 || nrhs == 0) {
-       for (j = 0; j < nrhs; ++j) {
-           ferr[j] = 0.;
-           berr[j] = 0.;
-       }
-       return;
-    }
-    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-    colequ = lsame_(equed, "C") || lsame_(equed, "B");
-    /* Allocate working space */
-    work = floatMalloc(2*A->nrow);
-    rwork = (float *) SUPERLU_MALLOC( A->nrow * sizeof(float) );
-    iwork = intMalloc(2*A->nrow);
-    if ( !work || !rwork || !iwork ) 
-        ABORT("Malloc fails for work/rwork/iwork.");
-    if ( notran ) {
-       *(unsigned char *)transc = 'N';
-        transt = TRANS;
-    } else {
-       *(unsigned char *)transc = 'T';
-       transt = NOTRANS;
-    }
-    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
-    nz     = A->ncol + 1;
-    eps    = slamch_("Epsilon");
-    safmin = slamch_("Safe minimum");
-    safe1  = nz * safmin;
-    safe2  = safe1 / eps;
-    /* Compute the number of nonzeros in each row (or column) of A */
-    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
-    if ( notran ) {
-       for (k = 0; k < A->ncol; ++k)
-           for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
-               ++iwork[Astore->rowind[i]];
-    } else {
-       for (k = 0; k < A->ncol; ++k)
-           iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
-    }  
-    /* Copy one column of RHS B into Bjcol. */
-    Bjcol.Stype = B->Stype;
-    Bjcol.Dtype = B->Dtype;
-    Bjcol.Mtype = B->Mtype;
-    Bjcol.nrow  = B->nrow;
-    Bjcol.ncol  = 1;
-    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
-    Bjcol_store = Bjcol.Store;
-    Bjcol_store->lda = ldb;
-    Bjcol_store->nzval = work; /* address aliasing */
-    /* Do for each right hand side ... */
-    for (j = 0; j < nrhs; ++j) {
-       count = 0;
-       lstres = 3.;
-       Bptr = &Bmat[j*ldb];
-       Xptr = &Xmat[j*ldx];
-       while (1) { /* Loop until stopping criterion is satisfied. */
-           /* Compute residual R = B - op(A) * X,   
-              where op(A) = A, A**T, or A**H, depending on TRANS. */
-#ifdef _CRAY
-           SCOPY(&A->nrow, Bptr, &ione, work, &ione);
-           scopy_(&A->nrow, Bptr, &ione, work, &ione);
-           sp_sgemv(transc, ndone, A, Xptr, ione, done, work, ione);
-           /* Compute componentwise relative backward error from formula 
-              max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
-              where abs(Z) is the componentwise absolute value of the matrix
-              or vector Z.  If the i-th component of the denominator is less
-              than SAFE2, then SAFE1 is added to the i-th component of the   
-              numerator and denominator before dividing. */
-           for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
-           /* Compute abs(op(A))*abs(X) + abs(B). */
-           if (notran) {
-               for (k = 0; k < A->ncol; ++k) {
-                   xk = fabs( Xptr[k] );
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                       rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
-               }
-           } else {
-               for (k = 0; k < A->ncol; ++k) {
-                   s = 0.;
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       s += fabs(Aval[i]) * fabs(Xptr[irow]);
-                   }
-                   rwork[k] += s;
-               }
-           }
-           s = 0.;
-           for (i = 0; i < A->nrow; ++i) {
-               if (rwork[i] > safe2)
-                   s = SUPERLU_MAX( s, fabs(work[i]) / rwork[i] );
-               else
-                   s = SUPERLU_MAX( s, (fabs(work[i]) + safe1) / 
-                               (rwork[i] + safe1) );
-           }
-           berr[j] = s;
-           /* Test stopping criterion. Continue iterating if   
-              1) The residual BERR(J) is larger than machine epsilon, and   
-              2) BERR(J) decreased by at least a factor of 2 during the   
-                 last iteration, and   
-              3) At most ITMAX iterations tried. */
-           if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
-               /* Update solution and try again. */
-               sgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-#ifdef _CRAY
-               SAXPY(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-               saxpy_(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-               lstres = berr[j];
-               ++count;
-           } else {
-               break;
-           }
-       } /* end while */
-       stat->RefineSteps = count;
-       /* Bound error from formula:
-          norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
-          ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
-          where   
-            norm(Z) is the magnitude of the largest component of Z   
-            inv(op(A)) is the inverse of op(A)   
-            abs(Z) is the componentwise absolute value of the matrix or
-              vector Z   
-            NZ is the maximum number of nonzeros in any row of A, plus 1   
-            EPS is machine epsilon   
-          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
-          is incremented by SAFE1 if the i-th component of   
-          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   
-          Use SLACON to estimate the infinity-norm of the matrix   
-             inv(op(A)) * diag(W),   
-          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
-       for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
-       /* Compute abs(op(A))*abs(X) + abs(B). */
-       if ( notran ) {
-           for (k = 0; k < A->ncol; ++k) {
-               xk = fabs( Xptr[k] );
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                   rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
-           }
-       } else {
-           for (k = 0; k < A->ncol; ++k) {
-               s = 0.;
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   xk = fabs( Xptr[irow] );
-                   s += fabs(Aval[i]) * xk;
-               }
-               rwork[k] += s;
-           }
-       }
-       for (i = 0; i < A->nrow; ++i)
-           if (rwork[i] > safe2)
-               rwork[i] = fabs(work[i]) + (iwork[i]+1)*eps*rwork[i];
-           else
-               rwork[i] = fabs(work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
-       kase = 0;
-       do {
-           slacon_(&A->nrow, &work[A->nrow], work,
-                   &iwork[A->nrow], &ferr[j], &kase);
-           if (kase == 0) break;
-           if (kase == 1) {
-               /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->nrow; ++i) work[i] *= R[i];
-               sgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
-           } else {
-               /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
-               for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
-               sgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= R[i];  
-           }
-       } while ( kase != 0 );
-       /* Normalize error. */
-       lstres = 0.;
-       if ( notran && colequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, C[i] * fabs( Xptr[i]) );
-       } else if ( !notran && rowequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, R[i] * fabs( Xptr[i]) );
-       } else {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, fabs( Xptr[i]) );
-       }
-       if ( lstres != 0. )
-           ferr[j] /= lstres;
-    } /* for each RHS j ... */
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(rwork);
-    SUPERLU_FREE(iwork);
-    SUPERLU_FREE(Bjcol.Store);
-    return;
-} /* sgsrfs */
diff --git a/superlu/sgssv.c b/superlu/sgssv.c
deleted file mode 100644
index 3c657806..00000000
--- a/superlu/sgssv.c
+++ /dev/null
@@ -1,230 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_sdefs.h"
-sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-      SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
-      SuperLUStat_t *stat, int *info )
- * Purpose
- * =======
- *
- * SGSSV solves the system of linear equations A*X=B, using the
- * LU factorization from SGSTRF. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *
- *      1.1. Permute the columns of A, forming A*Pc, where Pc
- *           is a permutation matrix. For more details of this step, 
- *           see sp_preorder.c.
- *
- *      1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
- *           by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      1.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the
- *      above algorithm to the transpose of A:
- *
- *      2.1. Permute columns of transpose(A) (rows of A),
- *           forming transpose(A)*Pc, where Pc is a permutation matrix. 
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
- *           determined by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      2.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- * 
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR; Dtype = SLU_S; Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- * perm_c  (input/output) int*
- *         If A->Stype = SLU_NC, column permutation vector of size A->ncol
- *         which defines the permutation matrix Pc; perm_c[i] = j means 
- *         column i of A is in position j in A*Pc.
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- *         If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
- *            options->Fact = SamePattern_SameRowPerm, it is an input argument.
- *            On exit, perm_c may be overwritten by the product of the input
- *            perm_c and a permutation that postorders the elimination tree
- *            of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *            is already in postorder.
- *         Otherwise, it is an output argument.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined 
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->RowPerm = MY_PERMR or
- *            options->Fact = SamePattern_SameRowPerm, perm_r is an
- *            input argument.
- *         otherwise it is an output argument.
- *
- * L       (output) SuperMatrix*
- *         The factor L from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- *         
- * U       (output) SuperMatrix*
- *        The factor U from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *         > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                so the solution could not be computed.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol.
- *   
- */
-    DNformat *Bstore;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int      lwork = 0, *etree, i;
-    /* Set default values for some parameters */
-    float   drop_tol = 0.;
-    int      panel_size;     /* panel size */
-    int      relax;          /* no of columns in a relaxed snodes */
-    int      permc_spec;
-    trans_t  trans = NOTRANS;
-    double   *utime;
-    double   t;        /* Temporary time */
-    /* Test the input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    if ( options->Fact != DOFACT ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-        (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-        A->Dtype != SLU_S || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-       B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
-       *info = -7;
-    if ( *info != 0 ) {
-       i = -(*info);
-       xerbla_("sgssv", &i);
-       return;
-    }
-    utime = stat->utime;
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       trans = TRANS;
-    } else {
-        if ( A->Stype == SLU_NC ) AA = A;
-    }
-    t = SuperLU_timer_();
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = NATURAL:  natural ordering 
-     *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-     *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-     *   permc_spec = COLAMD:   approximate minimum degree column ordering
-     *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-     */
-    permc_spec = options->ColPerm;
-    if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-      get_perm_c(permc_spec, AA, perm_c);
-    utime[COLPERM] = SuperLU_timer_() - t;
-    etree = intMalloc(A->ncol);
-    t = SuperLU_timer_();
-    sp_preorder(options, AA, perm_c, etree, &AC);
-    utime[ETREE] = SuperLU_timer_() - t;
-    panel_size = sp_ienv(1);
-    relax = sp_ienv(2);
-    /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-         relax, panel_size, sp_ienv(3), sp_ienv(4));*/
-    t = SuperLU_timer_(); 
-    /* Compute the LU factorization of A. */
-    sgstrf(options, &AC, drop_tol, relax, panel_size,
-          etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
-    utime[FACT] = SuperLU_timer_() - t;
-    t = SuperLU_timer_();
-    if ( *info == 0 ) {
-        /* Solve the system A*X=B, overwriting B with X. */
-        sgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
-    }
-    utime[SOLVE] = SuperLU_timer_() - t;
-    SUPERLU_FREE (etree);
-    Destroy_CompCol_Permuted(&AC);
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-    }
diff --git a/superlu/sgssvx.c b/superlu/sgssvx.c
deleted file mode 100644
index c8392abe..00000000
--- a/superlu/sgssvx.c
+++ /dev/null
@@ -1,623 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_sdefs.h"
-sgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-       int *etree, char *equed, float *R, float *C,
-       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
-       SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, 
-       float *rcond, float *ferr, float *berr, 
-       mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info )
- * Purpose
- * =======
- *
- * SGSSVX solves the system of linear equations A*X=B or A'*X=B, using
- * the LU factorization from sgstrf(). Error bounds on the solution and
- * a condition estimate are also provided. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *  
- *      1.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A is
- *           overwritten by diag(R)*A*diag(C) and B by diag(R)*B
- *           (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
- *           = TRANS or CONJ).
- *
- *      1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
- *           matrix that usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      1.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the matrix A (after equilibration if options->Equil = YES)
- *           as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
- *
- *      1.4. Compute the reciprocal pivot growth factor.
- *
- *      1.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form of 
- *           A is used to estimate the condition number of the matrix A. If
- *           the reciprocal of the condition number is less than machine
- *           precision, info = A->ncol+1 is returned as a warning, but the
- *           routine still goes on to solve for X and computes error bounds
- *           as described below.
- *
- *      1.6. The system of equations is solved for X using the factored form
- *           of A.
- *
- *      1.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      1.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R)
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
- *      to the transpose of A:
- *
- *      2.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A' is
- *           overwritten by diag(R)*A'*diag(C) and B by diag(R)*B 
- *           (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
- *
- *      2.2. Permute columns of transpose(A) (rows of A), 
- *           forming transpose(A)*Pc, where Pc is a permutation matrix that 
- *           usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the transpose(A) (after equilibration if 
- *           options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
- *           permutation Pr determined by partial pivoting.
- *
- *      2.4. Compute the reciprocal pivot growth factor.
- *
- *      2.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form 
- *           of transpose(A) is used to estimate the condition number of the
- *           matrix A. If the reciprocal of the condition number
- *           is less than machine precision, info = A->nrow+1 is returned as
- *           a warning, but the routine still goes on to solve for X and
- *           computes error bounds as described below.
- *
- *      2.6. The system of equations is solved for X using the factored form
- *           of transpose(A).
- *
- *      2.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      2.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R) 
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input/output) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- *         On entry, If options->Fact = FACTORED and equed is not 'N', 
- *         then A must have been equilibrated by the scaling factors in
- *         R and/or C.  
- *         On exit, A is not modified if options->Equil = NO, or if 
- *         options->Equil = YES but equed = 'N' on exit.
- *         Otherwise, if options->Equil = YES and equed is not 'N',
- *         A is scaled as follows:
- *         If A->Stype = SLU_NC:
- *           equed = 'R':  A := diag(R) * A
- *           equed = 'C':  A := A * diag(C)
- *           equed = 'B':  A := diag(R) * A * diag(C).
- *         If A->Stype = SLU_NR:
- *           equed = 'R':  transpose(A) := diag(R) * transpose(A)
- *           equed = 'C':  transpose(A) := transpose(A) * diag(C)
- *           equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
- *
- * perm_c  (input/output) int*
- *        If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
- *         which defines the permutation matrix Pc; perm_c[i] = j means
- *         column i of A is in position j in A*Pc.
- *         On exit, perm_c may be overwritten by the product of the input
- *         perm_c and a permutation that postorders the elimination tree
- *         of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *         is already in postorder.
- *
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow,
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *         will try to use the input perm_r, unless a certain threshold
- *         criterion is violated. In that case, perm_r is overwritten by a
- *         new permutation determined by partial pivoting or diagonal
- *         threshold pivoting.
- *         Otherwise, perm_r is output argument.
- * 
- * etree   (input/output) int*,  dimension (A->ncol)
- *         Elimination tree of Pc'*A'*A*Pc.
- *         If options->Fact != FACTORED and options->Fact != DOFACT,
- *         etree is an input argument, otherwise it is an output argument.
- *         Note: etree is a vector of parent pointers for a forest whose
- *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *
- * equed   (input/output) char*
- *         Specifies the form of equilibration that was done.
- *         = 'N': No equilibration.
- *         = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *         = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
- *         = 'B': Both row and column equilibration, i.e., A was replaced 
- *                by diag(R)*A*diag(C).
- *         If options->Fact = FACTORED, equed is an input argument,
- *         otherwise it is an output argument.
- *
- * R       (input/output) float*, dimension (A->nrow)
- *         The row scale factors for A or transpose(A).
- *         If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
- *         If equed = 'N' or 'C', R is not accessed.
- *         If options->Fact = FACTORED, R is an input argument,
- *             otherwise, R is output.
- *         If options->zFact = FACTORED and equed = 'R' or 'B', each element
- *             of R must be positive.
- * 
- * C       (input/output) float*, dimension (A->ncol)
- *         The column scale factors for A or transpose(A).
- *         If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
- *         If equed = 'N' or 'R', C is not accessed.
- *         If options->Fact = FACTORED, C is an input argument,
- *             otherwise, C is output.
- *         If options->Fact = FACTORED and equed = 'C' or 'B', each element
- *             of C must be positive.
- *         
- * L       (output) SuperMatrix*
- *        The factor L from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype SLU_= NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- *
- * U       (output) SuperMatrix*
- *        The factor U from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
- *
- * work    (workspace/output) void*, size (lwork) (in bytes)
- *         User supplied workspace, should be large enough
- *         to hold data structures for factors L and U.
- *         On exit, if fact is not 'F', L and U point to this array.
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               mem_usage->total_needed; no other side effects.
- *
- *         See argument 'mem_usage' for memory usage statistics.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         If B->ncol = 0, only LU decomposition is performed, the triangular
- *                         solve is skipped.
- *         On exit,
- *            if equed = 'N', B is not modified; otherwise
- *            if A->Stype = SLU_NC:
- *               if options->Trans = NOTRANS and equed = 'R' or 'B',
- *                  B is overwritten by diag(R)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
- *                  B is overwritten by diag(C)*B;
- *            if A->Stype = SLU_NR:
- *               if options->Trans = NOTRANS and equed = 'C' or 'B',
- *                  B is overwritten by diag(C)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
- *                  B is overwritten by diag(R)*B.
- *
- * X       (output) SuperMatrix*
- *         X has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE. 
- *         If info = 0 or info = A->ncol+1, X contains the solution matrix
- *         to the original system of equations. Note that A and B are modified
- *         on exit if equed is not 'N', and the solution to the equilibrated
- *         system is inv(diag(C))*X if options->Trans = NOTRANS and
- *         equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
- *         and equed = 'R' or 'B'.
- *
- * recip_pivot_growth (output) float*
- *         The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
- *         The infinity norm is used. If recip_pivot_growth is much less
- *         than 1, the stability of the LU factorization could be poor.
- *
- * rcond   (output) float*
- *         The estimate of the reciprocal condition number of the matrix A
- *         after equilibration (if done). If rcond is less than the machine
- *         precision (in particular, if rcond = 0), the matrix is singular
- *         to working precision. This condition is indicated by a return
- *         code of info > 0.
- *
- * FERR    (output) float*, dimension (B->ncol)   
- *         The estimated forward error bound for each solution vector   
- *         X(j) (the j-th column of the solution matrix X).   
- *         If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *         is an estimated upper bound for the magnitude of the largest 
- *         element in (X(j) - XTRUE) divided by the magnitude of the   
- *         largest element in X(j).  The estimate is as reliable as   
- *         the estimate for RCOND, and is almost always a slight   
- *         overestimate of the true error.
- *         If options->IterRefine = NOREFINE, ferr = 1.0.
- *
- * BERR    (output) float*, dimension (B->ncol)
- *         The componentwise relative backward error of each solution   
- *         vector X(j) (i.e., the smallest relative change in   
- *         any element of A or B that makes X(j) an exact solution).
- *         If options->IterRefine = NOREFINE, berr = 1.0.
- *
- * mem_usage (output) mem_usage_t*
- *         Record the memory usage statistics, consisting of following fields:
- *         - for_lu (float)
- *           The amount of space used in bytes for L\U data structures.
- *         - total_needed (float)
- *           The amount of space needed in bytes to perform factorization.
- *         - expansions (int)
- *           The number of memory expansions during the LU factorization.
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *         = 0: successful exit   
- *         < 0: if info = -i, the i-th argument had an illegal value   
- *         > 0: if info = i, and i is   
- *              <= A->ncol: U(i,i) is exactly zero. The factorization has   
- *                    been completed, but the factor U is exactly   
- *                    singular, so the solution and error bounds   
- *                    could not be computed.   
- *              = A->ncol+1: U is nonsingular, but RCOND is less than machine
- *                    precision, meaning that the matrix is singular to
- *                    working precision. Nevertheless, the solution and
- *                    error bounds are computed because there are a number
- *                    of situations where the computed solution can be more
- *                    accurate than the value of RCOND would suggest.   
- *              > A->ncol+1: number of bytes allocated when memory allocation
- *                    failure occurred, plus A->ncol.
- *
- */
-    DNformat  *Bstore, *Xstore;
-    float    *Bmat, *Xmat;
-    int       ldb, ldx, nrhs;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int       colequ, equil, nofact, notran, rowequ, permc_spec;
-    trans_t   trant;
-    char      norm[1];
-    int       i, j, info1;
-    float    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
-    int       relax, panel_size;
-    float    diag_pivot_thresh, drop_tol;
-    double    t0;      /* temporary time */
-    double    *utime;
-    /* External functions */
-    extern float slangs(char *, SuperMatrix *);
-    extern double slamch_(char *);
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    *info = 0;
-    nofact = (options->Fact != FACTORED);
-    equil = (options->Equil == YES);
-    notran = (options->Trans == NOTRANS);
-    if ( nofact ) {
-       *(unsigned char *)equed = 'N';
-       rowequ = FALSE;
-       colequ = FALSE;
-    } else {
-       rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-       colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       smlnum = slamch_("Safe minimum");
-       bignum = 1. / smlnum;
-    }
-#if 0
-printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
-       options->Fact, options->Trans, *equed);
-    /* Test the input parameters */
-    if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
-       options->Fact != SamePattern_SameRowPerm &&
-       !notran && options->Trans != TRANS && options->Trans != CONJ &&
-       !equil && options->Equil != NO)
-       *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-             A->Dtype != SLU_S || A->Mtype != SLU_GE )
-       *info = -2;
-    else if (options->Fact == FACTORED &&
-            !(rowequ || colequ || lsame_(equed, "N")))
-       *info = -6;
-    else {
-       if (rowequ) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, R[j]);
-               rcmax = SUPERLU_MAX(rcmax, R[j]);
-           }
-           if (rcmin <= 0.) *info = -7;
-           else if ( A->nrow > 0)
-               rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else rowcnd = 1.;
-       }
-       if (colequ && *info == 0) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, C[j]);
-               rcmax = SUPERLU_MAX(rcmax, C[j]);
-           }
-           if (rcmin <= 0.) *info = -8;
-           else if (A->nrow > 0)
-               colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else colcnd = 1.;
-       }
-       if (*info == 0) {
-           if ( lwork < -1 ) *info = -12;
-           else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     B->Stype != SLU_DN || B->Dtype != SLU_S || 
-                     B->Mtype != SLU_GE )
-               *info = -13;
-           else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     (B->ncol != 0 && B->ncol != X->ncol) ||
-                      X->Stype != SLU_DN ||
-                     X->Dtype != SLU_S || X->Mtype != SLU_GE )
-               *info = -14;
-       }
-    }
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("sgssvx", &i);
-       return;
-    }
-    /* Initialization for factor parameters */
-    panel_size = sp_ienv(1);
-    relax      = sp_ienv(2);
-    diag_pivot_thresh = options->DiagPivotThresh;
-    drop_tol   = 0.0;
-    utime = stat->utime;
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       if ( notran ) { /* Reverse the transpose argument. */
-           trant = TRANS;
-           notran = 0;
-       } else {
-           trant = NOTRANS;
-           notran = 1;
-       }
-    } else { /* A->Stype == SLU_NC */
-       trant = options->Trans;
-       AA = A;
-    }
-    if ( nofact && equil ) {
-       t0 = SuperLU_timer_();
-       /* Compute row and column scalings to equilibrate the matrix A. */
-       sgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
-       if ( info1 == 0 ) {
-           /* Equilibrate matrix A. */
-           slaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
-           rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-           colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       }
-       utime[EQUIL] = SuperLU_timer_() - t0;
-    }
-    if ( nrhs > 0 ) {
-        /* Scale the right hand side if equilibration was performed. */
-        if ( notran ) {
-           if ( rowequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                       Bmat[i + j*ldb] *= R[i];
-                   }
-           }
-        } else if ( colequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                   Bmat[i + j*ldb] *= C[i];
-               }
-        }
-    }
-    if ( nofact ) {
-        t0 = SuperLU_timer_();
-       /*
-        * Gnet column permutation vector perm_c[], according to permc_spec:
-        *   permc_spec = NATURAL:  natural ordering 
-        *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-        *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-        *   permc_spec = COLAMD:   approximate minimum degree column ordering
-        *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-        */
-       permc_spec = options->ColPerm;
-       if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-            get_perm_c(permc_spec, AA, perm_c);
-       utime[COLPERM] = SuperLU_timer_() - t0;
-       t0 = SuperLU_timer_();
-       sp_preorder(options, AA, perm_c, etree, &AC);
-       utime[ETREE] = SuperLU_timer_() - t0;
-/*     printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-              relax, panel_size, sp_ienv(3), sp_ienv(4));
-       fflush(stdout); */
-       /* Compute the LU factorization of A*Pc. */
-       t0 = SuperLU_timer_();
-       sgstrf(options, &AC, drop_tol, relax, panel_size,
-              etree, work, lwork, perm_c, perm_r, L, U, stat, info);
-       utime[FACT] = SuperLU_timer_() - t0;
-       if ( lwork == -1 ) {
-           mem_usage->total_needed = *info - A->ncol;
-           return;
-       }
-    }
-    if ( options->PivotGrowth ) {
-        if ( *info > 0 ) {
-           if ( *info <= A->ncol ) {
-               /* Compute the reciprocal pivot growth factor of the leading
-                  rank-deficient *info columns of A. */
-               *recip_pivot_growth = sPivotGrowth(*info, AA, perm_c, L, U);
-           }
-           return;
-        }
-        /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
-        *recip_pivot_growth = sPivotGrowth(A->ncol, AA, perm_c, L, U);
-    }
-    if ( options->ConditionNumber ) {
-        /* Estimate the reciprocal of the condition number of A. */
-        t0 = SuperLU_timer_();
-        if ( notran ) {
-           *(unsigned char *)norm = '1';
-        } else {
-           *(unsigned char *)norm = 'I';
-        }
-        anorm = slangs(norm, AA);
-        sgscon(norm, L, U, anorm, rcond, stat, info);
-        utime[RCOND] = SuperLU_timer_() - t0;
-    }
-    if ( nrhs > 0 ) {
-        /* Compute the solution matrix X. */
-        for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
-            for (i = 0; i < B->nrow; i++)
-               Xmat[i + j*ldx] = Bmat[i + j*ldb];
-        t0 = SuperLU_timer_();
-        sgstrs (trant, L, U, perm_c, perm_r, X, stat, info);
-        utime[SOLVE] = SuperLU_timer_() - t0;
-        /* Use iterative refinement to improve the computed solution and 
-           error bounds and backward error estimates for it. */
-        t0 = SuperLU_timer_();
-        if ( options->IterRefine != NOREFINE ) {
-            sgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B,
-                   X, ferr, berr, stat, info);
-        } else {
-            for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0;
-        }
-        utime[REFINE] = SuperLU_timer_() - t0;
-        /* Transform the solution matrix X to a solution of the original 
system. */
-        if ( notran ) {
-           if ( colequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        Xmat[i + j*ldx] *= C[i];
-                   }
-           }
-        } else if ( rowequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                   Xmat[i + j*ldx] *= R[i];
-                }
-        }
-    } /* end if nrhs > 0 */
-    if ( options->ConditionNumber ) {
-        /* Set INFO = A->ncol+1 if the matrix is singular to working 
precision. */
-        if ( *rcond < slamch_("E") ) *info = A->ncol + 1;
-    }
-    if ( nofact ) {
-        sQuerySpace(L, U, mem_usage);
-        Destroy_CompCol_Permuted(&AC);
-    }
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-    }
diff --git a/superlu/sgstrf.c b/superlu/sgstrf.c
deleted file mode 100644
index 4da42045..00000000
--- a/superlu/sgstrf.c
+++ /dev/null
@@ -1,431 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_sdefs.h"
-extern void countnz();
-extern void fixupL();
-sgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
-        int relax, int panel_size, int *etree, void *work, int lwork,
-        int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
-        SuperLUStat_t *stat, int *info)
- * Purpose
- * =======
- *
- * SGSTRF computes an LU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper 
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
- *
- * drop_tol (input) float (NOT IMPLEMENTED)
- *         Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
- * relax    (input) int
- *          To control degree of relaxing supernodes. If the number
- *          of nodes (columns) in a subtree of the elimination tree is less
- *          than relax, this subtree is considered as one supernode,
- *          regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *          A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *          Elimination tree of A'*A.
- *          Note: etree is a vector of parent pointers for a forest whose
- *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *          On input, the columns of A should be permuted so that the
- *          etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *          User-supplied work space and space for the output data structures.
- *          Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *         Column permutation vector, which defines the 
- *          permutation matrix Pc; perm_c[i] = j means column i of A is 
- *          in position j in A*Pc.
- *          When searching for diagonal, perm_c[*] is applied to the
- *          row subscripts of A, so that diagonal threshold pivoting
- *          can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *          Row permutation vector which defines the permutation matrix Pr,
- *          perm_r[i] = j means row i of A is in position j in Pr*A.
- *          If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *             will try to use the input perm_r, unless a certain threshold
- *             criterion is violated. In that case, perm_r is overwritten by
- *             a new permutation determined by partial pivoting or diagonal
- *             threshold pivoting.
- *          Otherwise, perm_r is output argument;
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = SLU_NC, 
- *          Dtype = SLU_S, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *          = 0: successful exit
- *          < 0: if info = -i, the i-th argument had an illegal value
- *          > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                and division by zero will occur if it is used to solve a
- *                system of equations.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol. If lwork = -1, it is
- *                the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays: 
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   xprune[0:n-1]: xprune[*] points to locations in subscript 
- *     vector lsub[*]. For column i, xprune[i] denotes the point where 
- *     structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need 
- *     to be traversed for symbolic factorization.
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been 
- *     reached when working on column j.
- *     Storage: relative to original row subscripts
- *     NOTE: There are 3 of them: marker/marker1 are used for panel dfs, 
- *           see spanel_dfs.c; marker2 is used for inner-factorization,
- *            see scolumn_dfs.c.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *      Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs) 
- *     unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *     in topological order of the dfs. A supernode representative is the 
- *     last column of a supernode.
- *      The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a 
- *     supernodal representative r, repfnz[r] is the location of the first 
- *     nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *     indicates the supernode r has been explored.
- *     NOTE: There are W of them, each used for one column of a panel. 
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below 
- *      the panel diagonal. These are filled in during spanel_dfs(), and are
- *      used later in the inner LU factorization within the panel.
- *     panel_lsub[]/dense[] pair forms the SPA data structure.
- *     NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *                NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *     The size of this array is defined by NUM_TEMPV() in ssp_defs.h.
- *
- */
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when 
-                                  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *iwork;
-    float    *swork;
-    int              *segrep, *repfnz, *parent, *xplore;
-    int              *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide 
SPA */
-    int              *xprune;
-    int              *marker;
-    float    *dense, *tempv;
-    int       *relax_end;
-    float    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    int       pivrow;   /* pivotal row number in the original matrix A */
-    int       nseg1;   /* no of segments in U-column above panel row jcol */
-    int       nseg;    /* no of segments in each U-column */
-    register int jcol; 
-    register int kcol; /* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;   /* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-    iinfo    = 0;
-    m        = A->nrow;
-    n        = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a        = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-    /* Allocate storage common to the factor routines */
-    *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz,
-                       panel_size, L, U, &Glu, &iwork, &swork);
-    if ( *info ) return;
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-            &repfnz, &panel_lsub, &xprune, &marker);
-    sSetRWork(m, panel_size, swork, &dense, &tempv);
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-       /* Compute the inverse of perm_r */
-       iperm_r = (int *) intMalloc(m);
-       for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-       iperm_r_allocated = 1;
-    }
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES ) {
-        heap_relax_snode(n, etree, relax, marker, relax_end); 
-    } else {
-        relax_snode(n, etree, relax, marker, relax_end); 
-    }
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-    /* 
-     * Work on one "panel" at a time. A panel is one of the following: 
-     *    (a) a relaxed supernode at the bottom of the etree, or
-     *    (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-       if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-           kcol = relax_end[jcol];       /* end of the relaxed snode */
-           panel_histo[kcol-jcol+1]++;
-           /* --------------------------------------
-            * Factorize the relaxed supernode(jcol:kcol) 
-            * -------------------------------------- */
-           /* Determine the union of the row structure of the snode */
-           if ( (*info = ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-                                   xprune, marker, &Glu)) != 0 )
-               return;
-            nextu    = xusub[jcol];
-           nextlu   = xlusup[jcol];
-           jsupno   = supno[jcol];
-           fsupc    = xsup[jsupno];
-           new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-           nzlumax = Glu.nzlumax;
-           while ( new_next > nzlumax ) {
-               if ( (*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) 
-                   return;
-           }
-           for (icol = jcol; icol<= kcol; icol++) {
-               xusub[icol+1] = nextu;
-               /* Scatter into SPA dense[*] */
-               for (k = xa_begin[icol]; k < xa_end[icol]; k++)
-                   dense[asub[k]] = a[k];
-               /* Numeric update within the snode */
-               ssnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-               if ( (*info = spivotL(icol, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-#ifdef DEBUG
-               sprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
-           }
-           jcol = icol;
-       } else { /* Work on one panel of panel_size columns */
-           /* Adjust panel_size so that a panel won't overlap with the next 
-            * relaxed snode.
-            */
-           panel_size = w_def;
-           for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) 
-               if ( relax_end[k] != EMPTY ) {
-                   panel_size = k - jcol;
-                   break;
-               }
-           if ( k == min_mn ) panel_size = min_mn - jcol;
-           panel_histo[panel_size]++;
-           /* symbolic factor on a panel of columns */
-           spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                     dense, panel_lsub, segrep, repfnz, xprune,
-                     marker, parent, xplore, &Glu);
-           /* numeric sup-panel updates in topological order */
-           spanel_bmod(m, panel_size, jcol, nseg1, dense,
-                       tempv, segrep, repfnz, &Glu, stat);
-           /* Sparse LU within the panel, and below panel diagonal */
-           for ( jj = jcol; jj < jcol + panel_size; jj++) {
-               k = (jj - jcol) * m; /* column index for w-wide arrays */
-               nseg = nseg1;   /* Begin after all the panel segments */
-               if ((*info = scolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
-                                       segrep, &repfnz[k], xprune, marker,
-                                       parent, xplore, &Glu)) != 0) return;
-               /* Numeric updates */
-               if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
-                                        tempv, &segrep[nseg1], &repfnz[k],
-                                        jcol, &Glu, stat)) != 0) return;
-               /* Copy the U-segments to ucol[*] */
-               if ((*info = scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-                                         perm_r, &dense[k], &Glu)) != 0)
-                   return;
-               if ( (*info = spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-               /* Prune columns (0:jj-1) using column jj */
-               spruneL(jj, perm_r, pivrow, nseg, segrep,
-                        &repfnz[k], xprune, &Glu);
-               /* Reset repfnz[] for this column */
-               resetrep_col (nseg, segrep, &repfnz[k]);
-#ifdef DEBUG
-               sprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
-           }
-           jcol += panel_size; /* Move to the next panel */
-       } /* else */
-    } /* for */
-    *info = iinfo;
-    if ( m > n ) {
-       k = 0;
-        for (i = 0; i < m; ++i) 
-            if ( perm_r[i] == EMPTY ) {
-               perm_r[i] = n + k;
-               ++k;
-           }
-    }
-    countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
-    fixupL(min_mn, perm_r, &Glu);
-    sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */
-    if ( fact == SamePattern_SameRowPerm ) {
-        /* L and U structures may have changed due to possibly different
-          pivoting, even though the storage is available.
-          There could also be memory expansions, so the array locations
-           may have changed, */
-        ((SCformat *)L->Store)->nnz = nnzL;
-       ((SCformat *)L->Store)->nsuper = Glu.supno[n];
-       ((SCformat *)L->Store)->nzval = Glu.lusup;
-       ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-       ((SCformat *)L->Store)->rowind = Glu.lsub;
-       ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-       ((NCformat *)U->Store)->nnz = nnzU;
-       ((NCformat *)U->Store)->nzval = Glu.ucol;
-       ((NCformat *)U->Store)->rowind = Glu.usub;
-       ((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-        sCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, 
-                                Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-                                Glu.xsup, SLU_SC, SLU_S, SLU_TRLU);
-       sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, 
-                              Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU);
-    }
-    ops[FACT] += ops[TRSV] + ops[GEMV];        
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
diff --git a/superlu/sgstrs.c b/superlu/sgstrs.c
deleted file mode 100644
index 7e2c6ecf..00000000
--- a/superlu/sgstrs.c
+++ /dev/null
@@ -1,331 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_sdefs.h"
-extern void strsm_();
-extern void sgemm_();
- * Function prototypes 
- */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
- * Purpose
- * =======
- *
- * SGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         sgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         sgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *        Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *        < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-    int      incx = 1, incy = 1;
-    float   alpha = 1.0, beta = 1.0;
-    float   *work_col;
-    DNformat *Bstore;
-    float   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    float   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    float   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void sprint_soln();
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU )
-       *info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU )
-       *info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
-       *info = -6;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("sgstrs", &i);
-       return;
-    }
-    n = L->nrow;
-    work = floatCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = floatMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    if ( trans == NOTRANS ) {
-       /* Permute right hand sides to form Pr*B */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       /* Forward solve PLy=Pb. */
-       for (k = 0; k <= Lstore->nsuper; k++) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           nrow = nsupr - nsupc;
-           solve_ops += nsupc * (nsupc - 1) * nrhs;
-           solve_ops += 2 * nrow * nsupc * nrhs;
-           if ( nsupc == 1 ) {
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   luptr = L_NZ_START(fsupc);
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
-                   }
-               }
-           } else {
-               luptr = L_NZ_START(fsupc);
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("N", strlen("N"));
-               ftcs3 = _cptofcd("U", strlen("U"));
-               STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-               strsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               sgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   work_col = &work[j*n];
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work_col[i]; /* Scatter */
-                       work_col[i] = 0.0;
-                       iptr++;
-                   }
-               }
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   slsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-                   smatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-                           &rhs_work[fsupc], &work[0] );
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work[i];
-                       work[i] = 0.0;
-                       iptr++;
-                   }
-               }
-           } /* else ... */
-       } /* for L-solve */
-#ifdef DEBUG
-       printf("After L-solve: y=\n");
-       sprint_soln(n, nrhs, Bmat);
-       /*
-        * Back solve Ux=y.
-        */
-       for (k = Lstore->nsuper; k >= 0; k--) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           luptr = L_NZ_START(fsupc);
-           solve_ops += nsupc * (nsupc + 1) * nrhs;
-           if ( nsupc == 1 ) {
-               rhs_work = &Bmat[0];
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work[fsupc] /= Lval[luptr];
-                   rhs_work += ldb;
-               }
-           } else {
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("U", strlen("U"));
-               ftcs3 = _cptofcd("N", strlen("N"));
-               STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               strsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               for (j = 0; j < nrhs; j++)
-                   susolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-           }
-           for (j = 0; j < nrhs; ++j) {
-               rhs_work = &Bmat[j*ldb];
-               for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-                   solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-                       irow = U_SUB(i);
-                       rhs_work[irow] -= rhs_work[jcol] * Uval[i];
-                   }
-               }
-           }
-       } /* for U-solve */
-#ifdef DEBUG
-       printf("After U-solve: x=\n");
-       sprint_soln(n, nrhs, Bmat);
-       /* Compute the final solution X := Pc*X. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-        stat->ops[SOLVE] = solve_ops;
-    } else { /* Solve A'*X=B or CONJ(A)*X=B */
-       /* Permute right hand sides to form Pc'*B. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       stat->ops[SOLVE] = 0;
-       for (k = 0; k < nrhs; ++k) {
-           /* Multiply by inv(U'). */
-           sp_strsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-           /* Multiply by inv(L'). */
-           sp_strsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-       }
-       /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-    }
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
- * Diagnostic print of the solution vector 
- */
-sprint_soln(int n, int nrhs, float *soln)
-    int i;
-    for (i = 0; i < n; i++) 
-       printf("\t%d: %.4f\n", i, soln[i]);
diff --git a/superlu/slacon.c b/superlu/slacon.c
deleted file mode 100644
index 27b7d40e..00000000
--- a/superlu/slacon.c
+++ /dev/null
@@ -1,249 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <math.h>
-#include "slu_Cnames.h"
-slacon_(int *n, float *v, float *x, int *isgn, float *est, int *kase)
-    Purpose   
-    =======   
-    SLACON estimates the 1-norm of a square matrix A.   
-    Reverse communication is used for evaluating matrix-vector products. 
-    Arguments   
-    =========   
-    N      (input) INT
-           The order of the matrix.  N >= 1.   
-    V      (workspace) FLOAT PRECISION array, dimension (N)   
-           On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
-           (W is not returned).   
-    X      (input/output) FLOAT PRECISION array, dimension (N)   
-           On an intermediate return, X should be overwritten by   
-                 A * X,   if KASE=1,   
-                 A' * X,  if KASE=2,
-           and SLACON must be re-called with all the other parameters   
-           unchanged.   
-    ISGN   (workspace) INT array, dimension (N)
-    EST    (output) FLOAT PRECISION   
-           An estimate (a lower bound) for norm(A).   
-    KASE   (input/output) INT
-           On the initial call to SLACON, KASE should be 0.   
-           On an intermediate return, KASE will be 1 or 2, indicating   
-           whether X should be overwritten by A * X  or A' * X.   
-           On the final return from SLACON, KASE will again be 0.   
-    Further Details   
-    ======= =======   
-    Contributed by Nick Higham, University of Manchester.   
-    Originally named CONEST, dated March 16, 1988.   
-    Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
-    a real or complex matrix, with applications to condition estimation", 
-    ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
-    ===================================================================== 
-    /* Table of constant values */
-    int c__1 = 1;
-    float      zero = 0.0;
-    float      one = 1.0;
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static float altsgn, estold;
-    static int i, j;
-    float temp;
-#ifdef _CRAY
-    extern int ISAMAX(int *, float *, int *);
-    extern float SASUM(int *, float *, int *);
-    extern int SCOPY(int *, float *, int *, float *, int *);
-    extern int isamax_(int *, float *, int *);
-    extern float sasum_(int *, float *, int *);
-    extern int scopy_(int *, float *, int *, float *, int *);
-#define d_sign(a, b) (b >= 0 ? fabs(a) : -fabs(a))    /* Copy sign */
-#define i_dnnt(a) \
-       ( a>=0 ? floor(a+.5) : -floor(.5-a) ) /* Round to nearest integer */
-    if ( *kase == 0 ) {
-       for (i = 0; i < *n; ++i) {
-           x[i] = 1. / (float) (*n);
-       }
-       *kase = 1;
-       jump = 1;
-       return 0;
-    }
-    switch (jump) {
-       case 1:  goto L20;
-       case 2:  goto L40;
-       case 3:  goto L70;
-       case 4:  goto L110;
-       case 5:  goto L140;
-    }
-    /*     ................ ENTRY   (JUMP = 1)   
-  L20:
-    if (*n == 1) {
-       v[0] = x[0];
-       *est = fabs(v[0]);
-       /*        ... QUIT */
-       goto L150;
-    }
-#ifdef _CRAY
-    *est = SASUM(n, x, &c__1);
-    *est = sasum_(n, x, &c__1);
-    for (i = 0; i < *n; ++i) {
-       x[i] = d_sign(one, x[i]);
-       isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 2)   
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-    j = isamax_(n, &x[0], &c__1);
-    --j;
-    iter = 2;
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 3)   
-#ifdef _CRAY
-    SCOPY(n, x, &c__1, v, &c__1);
-    scopy_(n, x, &c__1, v, &c__1);
-    estold = *est;
-#ifdef _CRAY
-    *est = SASUM(n, v, &c__1);
-    *est = sasum_(n, v, &c__1);
-    for (i = 0; i < *n; ++i)
-       if (i_dnnt(d_sign(one, x[i])) != isgn[i])
-           goto L90;
-    goto L120;
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-    for (i = 0; i < *n; ++i) {
-       x[i] = d_sign(one, x[i]);
-       isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 4)   
-    jlast = j;
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-    j = isamax_(n, &x[0], &c__1);
-    --j;
-    if (x[jlast] != fabs(x[j]) && iter < 5) {
-       ++iter;
-       goto L50;
-    }
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-       x[i-1] = altsgn * ((float)(i - 1) / (float)(*n - 1) + 1.);
-       altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 5)   
-#ifdef _CRAY
-    temp = SASUM(n, x, &c__1) / (float)(*n * 3) * 2.;
-    temp = sasum_(n, x, &c__1) / (float)(*n * 3) * 2.;
-    if (temp > *est) {
-#ifdef _CRAY
-       SCOPY(n, &x[0], &c__1, &v[0], &c__1);
-       scopy_(n, &x[0], &c__1, &v[0], &c__1);
-       *est = temp;
-    }
-    *kase = 0;
-    return 0;
-} /* slacon_ */
diff --git a/superlu/slamch.c b/superlu/slamch.c
deleted file mode 100644
index 08148e2d..00000000
--- a/superlu/slamch.c
+++ /dev/null
@@ -1,1023 +0,0 @@
-#include <stdio.h>
-#include "slu_Cnames.h"
-#define TRUE_ (1)
-#define FALSE_ (0)
-#define min(a,b) ((a) <= (b) ? (a) : (b))
-#define max(a,b) ((a) >= (b) ? (a) : (b))
-#define abs(x) ((x) >= 0 ? (x) : -(x))
-#define dabs(x) (double)abs(x)
-double slamch_(char *cmach)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Copyright (c) 1992-2013 The University of Tennessee and The University
-                        of Tennessee Research Foundation.  All rights
-                        reserved.
-       Copyright (c) 2000-2013 The University of California Berkeley. All
-                        rights reserved.
-       Copyright (c) 2006-2013 The University of Colorado Denver.  All rights
-                        reserved.
-       Redistribution and use in source and binary forms, with or without
-       modification, are permitted provided that the following conditions are
-       met:
-       - Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       - Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer listed
-         in this license in the documentation and/or other materials
-         provided with the distribution.
-       - Neither the name of the copyright holders nor the names of its
-         contributors may be used to endorse or promote products derived from
-         this software without specific prior written permission.
-       The copyright holders provide no reassurances that the source code
-       provided does not infringe any patent, copyright, or any other
-       intellectual property rights of third parties.  The copyright holders
-       disclaim any liability to any recipient for claims brought against
-       recipient by any third party for infringement of that parties
-       intellectual property rights.
-    Purpose   
-    =======   
-    SLAMCH determines single precision machine parameters.   
-    Arguments   
-    =========   
-    CMACH   (input) CHARACTER*1   
-            Specifies the value to be returned by SLAMCH:   
-            = 'E' or 'e',   SLAMCH := eps   
-            = 'S' or 's ,   SLAMCH := sfmin   
-            = 'B' or 'b',   SLAMCH := base   
-            = 'P' or 'p',   SLAMCH := eps*base   
-            = 'N' or 'n',   SLAMCH := t   
-            = 'R' or 'r',   SLAMCH := rnd   
-            = 'M' or 'm',   SLAMCH := emin   
-            = 'U' or 'u',   SLAMCH := rmin   
-            = 'L' or 'l',   SLAMCH := emax   
-            = 'O' or 'o',   SLAMCH := rmax   
-            where   
-            eps   = relative machine precision   
-            sfmin = safe minimum, such that 1/sfmin does not overflow   
-            base  = base of the machine   
-            prec  = eps*base   
-            t     = number of (base) digits in the mantissa   
-            rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise   
-            emin  = minimum exponent before (gradual) underflow   
-            rmin  = underflow threshold - base**(emin-1)   
-            emax  = largest exponent before overflow   
-            rmax  = overflow threshold  - (base**emax)*(1-eps)   
-   ===================================================================== 
-/* >>Start of File<<   
-       Initialized data */
-    static int first = TRUE_;
-    /* System generated locals */
-    int i__1;
-    float ret_val;
-    /* Builtin functions */
-    double pow_ri(float *, int *);
-    /* Local variables */
-    static float base;
-    static int beta;
-    static float emin, prec, emax;
-    static int imin, imax;
-    static int lrnd;
-    static float rmin, rmax, t, rmach;
-    extern int lsame_(char *, char *);
-    static float small, sfmin;
-    extern /* Subroutine */ int slamc2_(int *, int *, int *, float 
-           *, int *, float *, int *, float *);
-    static int it;
-    static float rnd, eps;
-    if (first) {
-       first = FALSE_;
-       slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
-       base = (float) beta;
-       t = (float) it;
-       if (lrnd) {
-           rnd = 1.f;
-           i__1 = 1 - it;
-           eps = pow_ri(&base, &i__1) / 2;
-       } else {
-           rnd = 0.f;
-           i__1 = 1 - it;
-           eps = pow_ri(&base, &i__1);
-       }
-       prec = eps * base;
-       emin = (float) imin;
-       emax = (float) imax;
-       sfmin = rmin;
-       small = 1.f / rmax;
-       if (small >= sfmin) {
-/*           Use SMALL plus a bit, to avoid the possibility of rou
-             causing overflow when computing  1/sfmin. */
-           sfmin = small * (eps + 1.f);
-       }
-    }
-    if (lsame_(cmach, "E")) {
-       rmach = eps;
-    } else if (lsame_(cmach, "S")) {
-       rmach = sfmin;
-    } else if (lsame_(cmach, "B")) {
-       rmach = base;
-    } else if (lsame_(cmach, "P")) {
-       rmach = prec;
-    } else if (lsame_(cmach, "N")) {
-       rmach = t;
-    } else if (lsame_(cmach, "R")) {
-       rmach = rnd;
-    } else if (lsame_(cmach, "M")) {
-       rmach = emin;
-    } else if (lsame_(cmach, "U")) {
-       rmach = rmin;
-    } else if (lsame_(cmach, "L")) {
-       rmach = emax;
-    } else if (lsame_(cmach, "O")) {
-       rmach = rmax;
-    }
-    ret_val = rmach;
-    return ret_val;
-/*     End of SLAMCH */
-} /* slamch_ */
-/* Subroutine */ int slamc1_(int *beta, int *t, int *rnd, int 
-       *ieee1)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-    Purpose   
-    =======   
-    SLAMC1 determines the machine parameters given by BETA, T, RND, and   
-    IEEE1.   
-    Arguments   
-    =========   
-    BETA    (output) INT   
-            The base of the machine.   
-    T       (output) INT   
-            The number of ( BETA ) digits in the mantissa.   
-    RND     (output) INT   
-            Specifies whether proper rounding  ( RND = .TRUE. )  or   
-            chopping  ( RND = .FALSE. )  occurs in addition. This may not 
-            be a reliable guide to the way in which the machine performs 
-            its arithmetic.   
-    IEEE1   (output) INT   
-            Specifies whether rounding appears to be done in the IEEE   
-            'round to nearest' style.   
-    Further Details   
-    ===============   
-    The routine is based on the routine  ENVRON  by Malcolm and   
-    incorporates suggestions by Gentleman and Marovich. See   
-       Malcolm M. A. (1972) Algorithms to reveal properties of   
-          floating-point arithmetic. Comms. of the ACM, 15, 949-951.   
-       Gentleman W. M. and Marovich S. B. (1974) More on algorithms   
-          that reveal properties of floating point arithmetic units.   
-          Comms. of the ACM, 17, 276-277.   
-   ===================================================================== 
-    /* Initialized data */
-    static int first = TRUE_;
-    /* System generated locals */
-    float r__1, r__2;
-    /* Local variables */
-    static int lrnd;
-    static float a, b, c, f;
-    static int lbeta;
-    static float savec;
-    static int lieee1;
-    static float t1, t2;
-    extern double slamc3_(float *, float *);
-    static int lt;
-    static float one, qtr;
-    if (first) {
-       first = FALSE_;
-       one = 1.f;
-/*        LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BE
-          IEEE1, T and RND.   
-          Throughout this routine  we use the function  SLAMC3  to ens
-          that relevant values are  stored and not held in registers, 
- or   
-          are not affected by optimizers.   
-          Compute  a = 2.0**m  with the  smallest positive integer m s
-          that   
-             fl( a + 1.0 ) = a. */
-       a = 1.f;
-       c = 1.f;
-/* +       WHILE( C.EQ.ONE )LOOP */
-       if (c == one) {
-           a *= 2;
-           c = slamc3_(&a, &one);
-           r__1 = -(double)a;
-           c = slamc3_(&c, &r__1);
-           goto L10;
-       }
-/* +       END WHILE   
-          Now compute  b = 2.0**m  with the smallest positive integer 
-          such that   
-             fl( a + b ) .gt. a. */
-       b = 1.f;
-       c = slamc3_(&a, &b);
-/* +       WHILE( C.EQ.A )LOOP */
-       if (c == a) {
-           b *= 2;
-           c = slamc3_(&a, &b);
-           goto L20;
-       }
-/* +       END WHILE   
-          Now compute the base.  a and c  are neighbouring floating po
-          numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and
- so   
-          their difference is beta. Adding 0.25 to c is to ensure that
- it   
-          is truncated to beta and not ( beta - 1 ). */
-       qtr = one / 4;
-       savec = c;
-       r__1 = -(double)a;
-       c = slamc3_(&c, &r__1);
-       lbeta = c + qtr;
-/*        Now determine whether rounding or chopping occurs,  by addin
-g a   
-          bit  less  than  beta/2  and a  bit  more  than  beta/2  to 
- a. */
-       b = (float) lbeta;
-       r__1 = b / 2;
-       r__2 = -(double)b / 100;
-       f = slamc3_(&r__1, &r__2);
-       c = slamc3_(&f, &a);
-       if (c == a) {
-           lrnd = TRUE_;
-       } else {
-           lrnd = FALSE_;
-       }
-       r__1 = b / 2;
-       r__2 = b / 100;
-       f = slamc3_(&r__1, &r__2);
-       c = slamc3_(&f, &a);
-       if (lrnd && c == a) {
-           lrnd = FALSE_;
-       }
-/*        Try and decide whether rounding is done in the  IEEE  'round
- to   
-          nearest' style. B/2 is half a unit in the last place of the 
-          numbers A and SAVEC. Furthermore, A is even, i.e. has last  
-          zero, and SAVEC is odd. Thus adding B/2 to A should not  cha
-          A, but adding B/2 to SAVEC should change SAVEC. */
-       r__1 = b / 2;
-       t1 = slamc3_(&r__1, &a);
-       r__1 = b / 2;
-       t2 = slamc3_(&r__1, &savec);
-       lieee1 = t1 == a && t2 > savec && lrnd;
-/*        Now find  the  mantissa, t.  It should  be the  integer part
- of   
-          log to the base beta of a,  however it is safer to determine
-  t   
-          by powering.  So we find t as the smallest positive integer 
-          which   
-             fl( beta**t + 1.0 ) = 1.0. */
-       lt = 0;
-       a = 1.f;
-       c = 1.f;
-/* +       WHILE( C.EQ.ONE )LOOP */
-       if (c == one) {
-           ++lt;
-           a *= lbeta;
-           c = slamc3_(&a, &one);
-           r__1 = -(double)a;
-           c = slamc3_(&c, &r__1);
-           goto L30;
-       }
-/* +       END WHILE */
-    }
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *ieee1 = lieee1;
-    return 0;
-/*     End of SLAMC1 */
-} /* slamc1_ */
-/* Subroutine */ int slamc2_(int *beta, int *t, int *rnd, float *
-       eps, int *emin, float *rmin, int *emax, float *rmax)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-    Purpose   
-    =======   
-    SLAMC2 determines the machine parameters specified in its argument   
-    list.   
-    Arguments   
-    =========   
-    BETA    (output) INT   
-            The base of the machine.   
-    T       (output) INT   
-            The number of ( BETA ) digits in the mantissa.   
-    RND     (output) INT   
-            Specifies whether proper rounding  ( RND = .TRUE. )  or   
-            chopping  ( RND = .FALSE. )  occurs in addition. This may not 
-            be a reliable guide to the way in which the machine performs 
-            its arithmetic.   
-    EPS     (output) FLOAT   
-            The smallest positive number such that   
-               fl( 1.0 - EPS ) .LT. 1.0,   
-            where fl denotes the computed value.   
-    EMIN    (output) INT   
-            The minimum exponent before (gradual) underflow occurs.   
-    RMIN    (output) FLOAT   
-            The smallest normalized number for the machine, given by   
-            BASE**( EMIN - 1 ), where  BASE  is the floating point value 
-            of BETA.   
-    EMAX    (output) INT   
-            The maximum exponent before overflow occurs.   
-    RMAX    (output) FLOAT   
-            The largest positive number for the machine, given by   
-            BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point 
-            value of BETA.   
-    Further Details   
-    ===============   
-    The computation of  EPS  is based on a routine PARANOIA by   
-    W. Kahan of the University of California at Berkeley.   
-   ===================================================================== 
-    /* Table of constant values */
-    static int c__1 = 1;
-    /* Initialized data */
-    static int first = TRUE_;
-    static int iwarn = FALSE_;
-    /* System generated locals */
-    int i__1;
-    float r__1, r__2, r__3, r__4, r__5;
-    /* Builtin functions */
-    double pow_ri(float *, int *);
-    /* Local variables */
-    static int ieee;
-    static float half;
-    static int lrnd;
-    static float leps, zero, a, b, c;
-    static int i, lbeta;
-    static float rbase;
-    static int lemin, lemax, gnmin;
-    static float small;
-    static int gpmin;
-    static float third, lrmin, lrmax, sixth;
-    static int lieee1;
-    extern /* Subroutine */ int slamc1_(int *, int *, int *, 
-           int *);
-    extern double slamc3_(float *, float *);
-    extern /* Subroutine */ int slamc4_(int *, float *, int *), 
-           slamc5_(int *, int *, int *, int *, int *, 
-           float *);
-    static int lt, ngnmin, ngpmin;
-    static float one, two;
-    if (first) {
-       first = FALSE_;
-       zero = 0.f;
-       one = 1.f;
-       two = 2.f;
-/*        LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values
- of   
-          BETA, T, RND, EPS, EMIN and RMIN.   
-          Throughout this routine  we use the function  SLAMC3  to ens
-          that relevant values are stored  and not held in registers, 
- or   
-          are not affected by optimizers.   
-          SLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1. 
-       slamc1_(&lbeta, &lt, &lrnd, &lieee1);
-/*        Start to find EPS. */
-       b = (float) lbeta;
-       i__1 = -lt;
-       a = pow_ri(&b, &i__1);
-       leps = a;
-/*        Try some tricks to see whether or not this is the correct  E
-PS. */
-       b = two / 3;
-       half = one / 2;
-       r__1 = -(double)half;
-       sixth = slamc3_(&b, &r__1);
-       third = slamc3_(&sixth, &sixth);
-       r__1 = -(double)half;
-       b = slamc3_(&third, &r__1);
-       b = slamc3_(&b, &sixth);
-       b = dabs(b);
-       if (b < leps) {
-           b = leps;
-       }
-       leps = 1.f;
-/* +       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
-       if (leps > b && b > zero) {
-           leps = b;
-           r__1 = half * leps;
-/* Computing 5th power */
-           r__3 = two, r__4 = r__3, r__3 *= r__3;
-/* Computing 2nd power */
-           r__5 = leps;
-           r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5);
-           c = slamc3_(&r__1, &r__2);
-           r__1 = -(double)c;
-           c = slamc3_(&half, &r__1);
-           b = slamc3_(&half, &c);
-           r__1 = -(double)b;
-           c = slamc3_(&half, &r__1);
-           b = slamc3_(&half, &c);
-           goto L10;
-       }
-/* +       END WHILE */
-       if (a < leps) {
-           leps = a;
-       }
-/*        Computation of EPS complete.   
-          Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3
-          Keep dividing  A by BETA until (gradual) underflow occurs. T
-          is detected when we cannot recover the previous A. */
-       rbase = one / lbeta;
-       small = one;
-       for (i = 1; i <= 3; ++i) {
-           r__1 = small * rbase;
-           small = slamc3_(&r__1, &zero);
-/* L20: */
-       }
-       a = slamc3_(&one, &small);
-       slamc4_(&ngpmin, &one, &lbeta);
-       r__1 = -(double)one;
-       slamc4_(&ngnmin, &r__1, &lbeta);
-       slamc4_(&gpmin, &a, &lbeta);
-       r__1 = -(double)a;
-       slamc4_(&gnmin, &r__1, &lbeta);
-       ieee = FALSE_;
-       if (ngpmin == ngnmin && gpmin == gnmin) {
-           if (ngpmin == gpmin) {
-               lemin = ngpmin;
-/*            ( Non twos-complement machines, no gradual under
-                e.g.,  VAX ) */
-           } else if (gpmin - ngpmin == 3) {
-               lemin = ngpmin - 1 + lt;
-               ieee = TRUE_;
-/*            ( Non twos-complement machines, with gradual und
-                e.g., IEEE standard followers ) */
-           } else {
-               lemin = min(ngpmin,gpmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-       } else if (ngpmin == gpmin && ngnmin == gnmin) {
-           if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
-               lemin = max(ngpmin,ngnmin);
-/*            ( Twos-complement machines, no gradual underflow
-                e.g., CYBER 205 ) */
-           } else {
-               lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-       } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
-                {
-           if (gpmin - min(ngpmin,ngnmin) == 3) {
-               lemin = max(ngpmin,ngnmin) - 1 + lt;
-/*            ( Twos-complement machines with gradual underflo
-                no known machine ) */
-           } else {
-               lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-       } else {
-/* Computing MIN */
-           i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
-           lemin = min(i__1,gnmin);
-/*         ( A guess; no known machine ) */
-           iwarn = TRUE_;
-       }
-/* **   
-   Comment out this if block if EMIN is ok */
-       if (iwarn) {
-           first = TRUE_;
-           printf("\n\n WARNING. The value EMIN may be incorrect:- ");
-           printf("EMIN = %8i\n",lemin);
-           printf("If, after inspection, the value EMIN looks acceptable");
-            printf("please comment out \n the IF block as marked within the"); 
-            printf("code of routine SLAMC2, \n otherwise supply EMIN"); 
-            printf("explicitly.\n");
-       }
-/* **   
-          Assume IEEE arithmetic if we found denormalised  numbers abo
-          or if arithmetic seems to round in the  IEEE style,  determi
-          in routine SLAMC1. A true IEEE machine should have both  thi
-          true; however, faulty machines may have one or the other. */
-       ieee = ieee || lieee1;
-/*        Compute  RMIN by successive division by  BETA. We could comp
-          RMIN as BASE**( EMIN - 1 ),  but some machines underflow dur
-          this computation. */
-       lrmin = 1.f;
-       i__1 = 1 - lemin;
-       for (i = 1; i <= 1-lemin; ++i) {
-           r__1 = lrmin * rbase;
-           lrmin = slamc3_(&r__1, &zero);
-/* L30: */
-       }
-/*        Finally, call SLAMC5 to compute EMAX and RMAX. */
-       slamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
-    }
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *eps = leps;
-    *emin = lemin;
-    *rmin = lrmin;
-    *emax = lemax;
-    *rmax = lrmax;
-    return 0;
-/*     End of SLAMC2 */
-} /* slamc2_ */
-double slamc3_(float *a, float *b)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-    Purpose   
-    =======   
-    SLAMC3  is intended to force  A  and  B  to be stored prior to doing 
-    the addition of  A  and  B ,  for use in situations where optimizers 
-    might hold one of these in a register.   
-    Arguments   
-    =========   
-    A, B    (input) FLOAT   
-            The values A and B.   
-   ===================================================================== 
-/* >>Start of File<<   
-       System generated locals */
-  volatile float ret_val; /* [added volatile to avoid -O3 optimizations.. 
(julien pommier)] */
-    ret_val = *a + *b;
-    return ret_val;
-/*     End of SLAMC3 */
-} /* slamc3_ */
-/* Subroutine */ int slamc4_(int *emin, float *start, int *base)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-    Purpose   
-    =======   
-    SLAMC4 is a service routine for SLAMC2.   
-    Arguments   
-    =========   
-    EMIN    (output) EMIN   
-            The minimum exponent before (gradual) underflow, computed by 
-            setting A = START and dividing by BASE until the previous A   
-            can not be recovered.   
-    START   (input) FLOAT   
-            The starting point for determining EMIN.   
-    BASE    (input) INT   
-            The base of the machine.   
-   ===================================================================== 
-    /* System generated locals */
-    int i__1;
-    float r__1;
-    /* Local variables */
-    static float zero, a;
-    static int i;
-    static float rbase, b1, b2, c1, c2, d1, d2;
-    extern double slamc3_(float *, float *);
-    static float one;
-    a = *start;
-    one = 1.f;
-    rbase = one / *base;
-    zero = 0.f;
-    *emin = 1;
-    r__1 = a * rbase;
-    b1 = slamc3_(&r__1, &zero);
-    c1 = a;
-    c2 = a;
-    d1 = a;
-    d2 = a;
-/* +    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.   
-      $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP */
-    if (c1 == a && c2 == a && d1 == a && d2 == a) {
-       --(*emin);
-       a = b1;
-       r__1 = a / *base;
-       b1 = slamc3_(&r__1, &zero);
-       r__1 = b1 * *base;
-       c1 = slamc3_(&r__1, &zero);
-       d1 = zero;
-       i__1 = *base;
-       for (i = 1; i <= *base; ++i) {
-           d1 += b1;
-/* L20: */
-       }
-       r__1 = a * rbase;
-       b2 = slamc3_(&r__1, &zero);
-       r__1 = b2 / rbase;
-       c2 = slamc3_(&r__1, &zero);
-       d2 = zero;
-       i__1 = *base;
-       for (i = 1; i <= *base; ++i) {
-           d2 += b2;
-/* L30: */
-       }
-       goto L10;
-    }
-/* +    END WHILE */
-    return 0;
-/*     End of SLAMC4 */
-} /* slamc4_ */
-/* Subroutine */ int slamc5_(int *beta, int *p, int *emin, 
-       int *ieee, int *emax, float *rmax)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-    Purpose   
-    =======   
-    SLAMC5 attempts to compute RMAX, the largest machine floating-point   
-    number, without overflow.  It assumes that EMAX + abs(EMIN) sum   
-    approximately to a power of 2.  It will fail on machines where this   
-    assumption does not hold, for example, the Cyber 205 (EMIN = -28625, 
-    EMAX = 28718).  It will also fail if the value supplied for EMIN is   
-    too large (i.e. too close to zero), probably with overflow.   
-    Arguments   
-    =========   
-    BETA    (input) INT   
-            The base of floating-point arithmetic.   
-    P       (input) INT   
-            The number of base BETA digits in the mantissa of a   
-            floating-point value.   
-    EMIN    (input) INT   
-            The minimum exponent before (gradual) underflow.   
-    IEEE    (input) INT   
-            A logical flag specifying whether or not the arithmetic   
-            system is thought to comply with the IEEE standard.   
-    EMAX    (output) INT   
-            The largest exponent before overflow   
-    RMAX    (output) FLOAT   
-            The largest machine floating-point number.   
-   ===================================================================== 
-       First compute LEXP and UEXP, two powers of 2 that bound   
-       abs(EMIN). We then assume that EMAX + abs(EMIN) will sum   
-       approximately to the bound that is closest to abs(EMIN).   
-       (EMAX is the exponent of the required number RMAX). */
-    /* Table of constant values */
-    static float c_b5 = 0.f;
-    /* System generated locals */
-    int i__1;
-    float r__1;
-    /* Local variables */
-    static int lexp;
-    static float oldy;
-    static int uexp, i;
-    static float y, z;
-    static int nbits;
-    extern double slamc3_(float *, float *);
-    static float recbas;
-    static int exbits, expsum, try__;
-    lexp = 1;
-    exbits = 1;
-    try__ = lexp << 1;
-    if (try__ <= -(*emin)) {
-       lexp = try__;
-       ++exbits;
-       goto L10;
-    }
-    if (lexp == -(*emin)) {
-       uexp = lexp;
-    } else {
-       uexp = try__;
-       ++exbits;
-    }
-/*     Now -LEXP is less than or equal to EMIN, and -UEXP is greater   
-       than or equal to EMIN. EXBITS is the number of bits needed to   
-       store the exponent. */
-    if (uexp + *emin > -lexp - *emin) {
-       expsum = lexp << 1;
-    } else {
-       expsum = uexp << 1;
-    }
-/*     EXPSUM is the exponent range, approximately equal to   
-       EMAX - EMIN + 1 . */
-    *emax = expsum + *emin - 1;
-    nbits = exbits + 1 + *p;
-/*     NBITS is the total number of bits needed to store a   
-       floating-point number. */
-    if (nbits % 2 == 1 && *beta == 2) {
-/*        Either there are an odd number of bits used to store a   
-          floating-point number, which is unlikely, or some bits are 
-          not used in the representation of numbers, which is possible
-          (e.g. Cray machines) or the mantissa has an implicit bit,   
-          (e.g. IEEE machines, Dec Vax machines), which is perhaps the
-          most likely. We have to assume the last alternative.   
-          If this is true, then we need to reduce EMAX by one because 
-          there must be some way of representing zero in an implicit-b
-          system. On machines like Cray, we are reducing EMAX by one 
-          unnecessarily. */
-       --(*emax);
-    }
-    if (*ieee) {
-/*        Assume we are on an IEEE machine which reserves one exponent
-          for infinity and NaN. */
-       --(*emax);
-    }
-/*     Now create RMAX, the largest machine number, which should   
-       be equal to (1.0 - BETA**(-P)) * BETA**EMAX .   
-       First compute 1.0 - BETA**(-P), being careful that the   
-       result is less than 1.0 . */
-    recbas = 1.f / *beta;
-    z = *beta - 1.f;
-    y = 0.f;
-    i__1 = *p;
-    for (i = 1; i <= *p; ++i) {
-       z *= recbas;
-       if (y < 1.f) {
-           oldy = y;
-       }
-       y = slamc3_(&y, &z);
-/* L20: */
-    }
-    if (y >= 1.f) {
-       y = oldy;
-    }
-/*     Now multiply by BETA**EMAX to get RMAX. */
-    i__1 = *emax;
-    for (i = 1; i <= *emax; ++i) {
-       r__1 = y * *beta;
-       y = slamc3_(&r__1, &c_b5);
-/* L30: */
-    }
-    *rmax = y;
-    return 0;
-/*     End of SLAMC5 */
-} /* slamc5_ */
-double pow_ri(float *ap, int *bp)
-double pow, x;
-int n;
-pow = 1;
-x = *ap;
-n = *bp;
-if(n != 0)
-       {
-       if(n < 0)
-               {
-               n = -n;
-               x = 1/x;
-               }
-       for( ; ; )
-               {
-               if(n & 01)
-                       pow *= x;
-               if(n >>= 1)
-                       x *= x;
-               else
-                       break;
-               }
-       }
diff --git a/superlu/slangs.c b/superlu/slangs.c
deleted file mode 100644
index f82368fc..00000000
--- a/superlu/slangs.c
+++ /dev/null
@@ -1,131 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  slangs.c
- * History:     Modified from lapack routine SLANGE
- */
-#include <math.h>
-#include "slu_sdefs.h"
-float slangs(char *norm, SuperMatrix *A)
-    Purpose   
-    =======   
-    SLANGS returns the value of the one norm, or the Frobenius norm, or 
-    the infinity norm, or the element of largest absolute value of a 
-    real matrix A.   
-    Description   
-    ===========   
-    SLANGE returns the value   
-       SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
-                (   
-                ( norm1(A),         NORM = '1', 'O' or 'o'   
-                (   
-                ( normI(A),         NORM = 'I' or 'i'   
-                (   
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
-    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
-    Arguments   
-    =========   
-    NORM    (input) CHARACTER*1   
-            Specifies the value to be returned in SLANGE as described above.   
-    A       (input) SuperMatrix*
-            The M by N sparse matrix A. 
-   ===================================================================== 
-    /* Local variables */
-    NCformat *Astore;
-    float   *Aval;
-    int      i, j, irow;
-    float   value, sum;
-    float   *rwork;
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) {
-       value = 0.;
-    } else if (lsame_(norm, "M")) {
-       /* Find max(abs(A(i,j))). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
-               value = SUPERLU_MAX( value, fabs( Aval[i]) );
-    } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') {
-       /* Find norm1(A). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j) {
-           sum = 0.;
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) 
-               sum += fabs(Aval[i]);
-           value = SUPERLU_MAX(value,sum);
-       }
-    } else if (lsame_(norm, "I")) {
-       /* Find normI(A). */
-       if ( !(rwork = (float *) SUPERLU_MALLOC(A->nrow * sizeof(float))) )
-           ABORT("SUPERLU_MALLOC fails for rwork.");
-       for (i = 0; i < A->nrow; ++i) rwork[i] = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) {
-               irow = Astore->rowind[i];
-               rwork[irow] += fabs(Aval[i]);
-           }
-       value = 0.;
-       for (i = 0; i < A->nrow; ++i)
-           value = SUPERLU_MAX(value, rwork[i]);
-       SUPERLU_FREE (rwork);
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
-       /* Find normF(A). */
-       ABORT("Not implemented.");
-    } else
-       ABORT("Illegal norm specified.");
-    return (value);
-} /* slangs */
diff --git a/superlu/slaqgs.c b/superlu/slaqgs.c
deleted file mode 100644
index 083d6652..00000000
--- a/superlu/slaqgs.c
+++ /dev/null
@@ -1,157 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  slaqgs.c
- * History:     Modified from LAPACK routine SLAQGE
- */
-#include <math.h>
-#include "slu_sdefs.h"
-slaqgs(SuperMatrix *A, float *r, float *c, 
-       float rowcnd, float colcnd, float amax, char *equed)
-    Purpose   
-    =======   
-    SLAQGS equilibrates a general sparse M by N matrix A using the row and   
-    scaling factors in the vectors R and C.   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    A       (input/output) SuperMatrix*
-            On exit, the equilibrated matrix.  See EQUED for the form of 
-            the equilibrated matrix. The type of A can be:
-           Stype = NC; Dtype = SLU_S; Mtype = GE.
-    R       (input) float*, dimension (A->nrow)
-            The row scale factors for A.
-    C       (input) float*, dimension (A->ncol)
-            The column scale factors for A.
-    ROWCND  (input) float
-            Ratio of the smallest R(i) to the largest R(i).
-    COLCND  (input) float
-            Ratio of the smallest C(i) to the largest C(i).
-    AMAX    (input) float
-            Absolute value of largest matrix entry.
-    EQUED   (output) char*
-            Specifies the form of equilibration that was done.   
-            = 'N':  No equilibration   
-            = 'R':  Row equilibration, i.e., A has been premultiplied by  
-                    diag(R).   
-            = 'C':  Column equilibration, i.e., A has been postmultiplied  
-                    by diag(C).   
-            = 'B':  Both row and column equilibration, i.e., A has been
-                    replaced by diag(R) * A * diag(C).   
-    Internal Parameters   
-    ===================   
-    THRESH is a threshold value used to decide if row or column scaling   
-    should be done based on the ratio of the row or column scaling   
-    factors.  If ROWCND < THRESH, row scaling is done, and if   
-    COLCND < THRESH, column scaling is done.   
-    LARGE and SMALL are threshold values used to decide if row scaling   
-    should be done based on the absolute size of the largest matrix   
-    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
-    ===================================================================== 
-#define THRESH    (0.1)
-    /* Local variables */
-    NCformat *Astore;
-    float   *Aval;
-    int i, j, irow;
-    float large, small, cj;
-    extern double slamch_(char *);
-    /* Quick return if possible */
-    if (A->nrow <= 0 || A->ncol <= 0) {
-       *(unsigned char *)equed = 'N';
-       return;
-    }
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Initialize LARGE and SMALL. */
-    small = slamch_("Safe minimum") / slamch_("Precision");
-    large = 1. / small;
-    if (rowcnd >= THRESH && amax >= small && amax <= large) {
-       if (colcnd >= THRESH)
-           *(unsigned char *)equed = 'N';
-       else {
-           /* Column scaling */
-           for (j = 0; j < A->ncol; ++j) {
-               cj = c[j];
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   Aval[i] *= cj;
-                }
-           }
-           *(unsigned char *)equed = 'C';
-       }
-    } else if (colcnd >= THRESH) {
-       /* Row scaling, no column scaling */
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               Aval[i] *= r[irow];
-           }
-       *(unsigned char *)equed = 'R';
-    } else {
-       /* Row and column scaling */
-       for (j = 0; j < A->ncol; ++j) {
-           cj = c[j];
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               Aval[i] *= cj * r[irow];
-           }
-       }
-       *(unsigned char *)equed = 'B';
-    }
-    return;
-} /* slaqgs */
diff --git a/superlu/slu_Cnames.h b/superlu/slu_Cnames.h
deleted file mode 100644
index 7c8e7dd5..00000000
--- a/superlu/slu_Cnames.h
+++ /dev/null
@@ -1,356 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 1, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#ifndef __SUPERLU_CNAMES /* allow multiple inclusions */
- * These macros define how C routines will be called.  ADD_ assumes that
- * they will be called by fortran, which expects C routines to have an
- * underscore postfixed to the name (Suns, and the Intel expect this).
- * NOCHANGE indicates that fortran will be calling, and that it expects
- * the name called by fortran to be identical to that compiled by the C
- * (RS6K's do this).  UPCASE says it expects C routines called by fortran
- * to be in all upcase (CRAY wants this). 
- */
-#define ADD_       0
-#define ADD__      1
-#define NOCHANGE   2
-#define UPCASE     3
-#define C_CALL     4
-#ifdef UpCase
-#define F77_CALL_C UPCASE
-#ifdef NoChange
-#define F77_CALL_C NOCHANGE
-#ifdef Add_
-#define F77_CALL_C ADD_
-#ifdef Add__
-#define F77_CALL_C ADD__
-/* Default */
-#ifndef F77_CALL_C
-#define F77_CALL_C ADD_
-#if (F77_CALL_C == ADD_)
- * These defines set up the naming scheme required to have a fortran 77
- * routine call a C routine
- * No redefinition necessary to have following Fortran to C interface:
- *           FORTRAN CALL               C DECLARATION
- *           call dgemm(...)           void dgemm_(...)
- *
- * This is the default.
- */
-#if (F77_CALL_C == ADD__)
- * These defines set up the naming scheme required to have a fortran 77
- * routine call a C routine 
- * for following Fortran to C interface:
- *           FORTRAN CALL               C DECLARATION
- *           call dgemm(...)           void dgemm__(...)
- */
-/* BLAS */
-#define sasum_    sasum__
-#define isamax_   isamax__
-#define scopy_    scopy__
-#define sscal_    sscal__
-#define sger_     sger__
-#define snrm2_    snrm2__
-#define ssymv_    ssymv__
-#define sdot_     sdot__
-#define saxpy_    saxpy__
-#define ssyr2_    ssyr2__
-#define srot_     srot__
-#define sgemv_    sgemv__
-#define strsv_    strsv__
-#define sgemm_    sgemm__
-#define strsm_    strsm__
-#define dasum_    dasum__
-#define idamax_   idamax__
-#define dcopy_    dcopy__
-#define dscal_    dscal__
-#define dger_     dger__
-#define dnrm2_    dnrm2__
-#define dsymv_    dsymv__
-#define ddot_     ddot__
-#define daxpy_    daxpy__
-#define dsyr2_    dsyr2__
-#define drot_     drot__
-#define dgemv_    dgemv__
-#define dtrsv_    dtrsv__
-#define dgemm_    dgemm__
-#define dtrsm_    dtrsm__
-#define scasum_   scasum__
-#define icamax_   icamax__
-#define ccopy_    ccopy__
-#define cscal_    cscal__
-#define scnrm2_   scnrm2__
-#define caxpy_    caxpy__
-#define cgemv_    cgemv__
-#define ctrsv_    ctrsv__
-#define cgemm_    cgemm__
-#define ctrsm_    ctrsm__
-#define cgerc_    cgerc__
-#define chemv_    chemv__
-#define cher2_    cher2__
-#define dzasum_   dzasum__
-#define izamax_   izamax__
-#define zcopy_    zcopy__
-#define zscal_    zscal__
-#define dznrm2_   dznrm2__
-#define zaxpy_    zaxpy__
-#define zgemv_    zgemv__
-#define ztrsv_    ztrsv__
-#define zgemm_    zgemm__
-#define ztrsm_    ztrsm__
-#define zgerc_    zgerc__
-#define zhemv_    zhemv__
-#define zher2_    zher2__
-/* LAPACK */
-#define dlamch_   dlamch__
-#define slamch_   slamch__
-#define xerbla_   xerbla__
-#define lsame_    lsame__
-#define dlacon_   dlacon__
-#define slacon_   slacon__
-#define icmax1_   icmax1__
-#define scsum1_   scsum1__
-#define clacon_   clacon__
-#define dzsum1_   dzsum1__
-#define izmax1_   izmax1__
-#define zlacon_   zlacon__
-/* Fortran interface */
-#define c_bridge_dgssv_ c_bridge_dgssv__
-#define c_fortran_sgssv_ c_fortran_sgssv__
-#define c_fortran_dgssv_ c_fortran_dgssv__
-#define c_fortran_cgssv_ c_fortran_cgssv__
-#define c_fortran_zgssv_ c_fortran_zgssv__
-#if (F77_CALL_C == UPCASE)
- * These defines set up the naming scheme required to have a fortran 77
- * routine call a C routine 
- * following Fortran to C interface:
- *           FORTRAN CALL               C DECLARATION
- *           call dgemm(...)           void DGEMM(...)
- */
-/* BLAS */
-#define sasum_    SASUM
-#define isamax_   ISAMAX
-#define scopy_    SCOPY
-#define sscal_    SSCAL
-#define sger_     SGER
-#define snrm2_    SNRM2
-#define ssymv_    SSYMV
-#define sdot_     SDOT
-#define saxpy_    SAXPY
-#define ssyr2_    SSYR2
-#define srot_     SROT
-#define sgemv_    SGEMV
-#define strsv_    STRSV
-#define sgemm_    SGEMM
-#define strsm_    STRSM
-#define dasum_    SASUM
-#define idamax_   ISAMAX
-#define dcopy_    SCOPY
-#define dscal_    SSCAL
-#define dger_     SGER
-#define dnrm2_    SNRM2
-#define dsymv_    SSYMV
-#define ddot_     SDOT
-#define daxpy_    SAXPY
-#define dsyr2_    SSYR2
-#define drot_     SROT
-#define dgemv_    SGEMV
-#define dtrsv_    STRSV
-#define dgemm_    SGEMM
-#define dtrsm_    STRSM
-#define scasum_   SCASUM
-#define icamax_   ICAMAX
-#define ccopy_    CCOPY
-#define cscal_    CSCAL
-#define scnrm2_   SCNRM2
-#define caxpy_    CAXPY
-#define cgemv_    CGEMV
-#define ctrsv_    CTRSV
-#define cgemm_    CGEMM
-#define ctrsm_    CTRSM
-#define cgerc_    CGERC
-#define chemv_    CHEMV
-#define cher2_    CHER2
-#define dzasum_   SCASUM
-#define izamax_   ICAMAX
-#define zcopy_    CCOPY
-#define zscal_    CSCAL
-#define dznrm2_   SCNRM2
-#define zaxpy_    CAXPY
-#define zgemv_    CGEMV
-#define ztrsv_    CTRSV
-#define zgemm_    CGEMM
-#define ztrsm_    CTRSM
-#define zgerc_    CGERC
-#define zhemv_    CHEMV
-#define zher2_    CHER2
-/* LAPACK */
-#define dlamch_   DLAMCH
-#define slamch_   SLAMCH
-#define xerbla_   XERBLA
-#define lsame_    LSAME
-#define dlacon_   DLACON
-#define slacon_   SLACON
-#define icmax1_   ICMAX1
-#define scsum1_   SCSUM1
-#define clacon_   CLACON
-#define dzsum1_   DZSUM1
-#define izmax1_   IZMAX1
-#define zlacon_   ZLACON
-/* Fortran interface */
-#define c_bridge_dgssv_ C_BRIDGE_DGSSV
-#define c_fortran_sgssv_ C_FORTRAN_SGSSV
-#define c_fortran_dgssv_ C_FORTRAN_DGSSV
-#define c_fortran_cgssv_ C_FORTRAN_CGSSV
-#define c_fortran_zgssv_ C_FORTRAN_ZGSSV
-#if (F77_CALL_C == NOCHANGE)
- * These defines set up the naming scheme required to have a fortran 77
- * routine call a C routine 
- * for following Fortran to C interface:
- *           FORTRAN CALL               C DECLARATION
- *           call dgemm(...)           void dgemm(...)
- */
-/* BLAS */
-#define sasum_    sasum
-#define isamax_   isamax
-#define scopy_    scopy
-#define sscal_    sscal
-#define sger_     sger
-#define snrm2_    snrm2
-#define ssymv_    ssymv
-#define sdot_     sdot
-#define saxpy_    saxpy
-#define ssyr2_    ssyr2
-#define srot_     srot
-#define sgemv_    sgemv
-#define strsv_    strsv
-#define sgemm_    sgemm
-#define strsm_    strsm
-#define dasum_    dasum
-#define idamax_   idamax
-#define dcopy_    dcopy
-#define dscal_    dscal
-#define dger_     dger
-#define dnrm2_    dnrm2
-#define dsymv_    dsymv
-#define ddot_     ddot
-#define daxpy_    daxpy
-#define dsyr2_    dsyr2
-#define drot_     drot
-#define dgemv_    dgemv
-#define dtrsv_    dtrsv
-#define dgemm_    dgemm
-#define dtrsm_    dtrsm
-#define scasum_   scasum
-#define icamax_   icamax
-#define ccopy_    ccopy
-#define cscal_    cscal
-#define scnrm2_   scnrm2
-#define caxpy_    caxpy
-#define cgemv_    cgemv
-#define ctrsv_    ctrsv
-#define cgemm_    cgemm
-#define ctrsm_    ctrsm
-#define cgerc_    cgerc
-#define chemv_    chemv
-#define cher2_    cher2
-#define dzasum_   dzasum
-#define izamax_   izamax
-#define zcopy_    zcopy
-#define zscal_    zscal
-#define dznrm2_   dznrm2
-#define zaxpy_    zaxpy
-#define zgemv_    zgemv
-#define ztrsv_    ztrsv
-#define zgemm_    zgemm
-#define ztrsm_    ztrsm
-#define zgerc_    zgerc
-#define zhemv_    zhemv
-#define zher2_    zher2
-/* LAPACK */
-#define dlamch_   dlamch
-#define slamch_   slamch
-#define xerbla_   xerbla
-#define lsame_    lsame
-#define dlacon_   dlacon
-#define slacon_   slacon
-#define icmax1_   icmax1
-#define scsum1_   scsum1
-#define clacon_   clacon
-#define dzsum1_   dzsum1
-#define izmax1_   izmax1
-#define zlacon_   zlacon
-/* Fortran interface */
-#define c_bridge_dgssv_ c_bridge_dgssv
-#define c_fortran_sgssv_ c_fortran_sgssv
-#define c_fortran_dgssv_ c_fortran_dgssv
-#define c_fortran_cgssv_ c_fortran_cgssv
-#define c_fortran_zgssv_ c_fortran_zgssv
-#endif /* __SUPERLU_CNAMES */
diff --git a/superlu/slu_cdefs.h b/superlu/slu_cdefs.h
deleted file mode 100644
index ade6a025..00000000
--- a/superlu/slu_cdefs.h
+++ /dev/null
@@ -1,246 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#ifndef __SUPERLU_cSP_DEFS /* allow multiple inclusions */
-#define __SUPERLU_cSP_DEFS
- * File name:          csp_defs.h
- * Purpose:             Sparse matrix types and function prototypes
- * History:
- */
-#ifdef _CRAY
-#include <fortran.h>
-#include <string.h>
-/* Define my integer type int_t */
-typedef int int_t; /* default */
-#include "slu_Cnames.h"
-#include "supermatrix.h"
-#include "slu_util.h"
-#include "slu_scomplex.h"
- * Global data structures used in LU factorization -
- * 
- *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
- *   (xsup,supno): supno[i] is the supernode no to which i belongs;
- *     xsup(s) points to the beginning of the s-th supernode.
- *     e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
- *             xsup 0 1 2 4 7 12
- *     Note: dfs will be performed on supernode rep. relative to the new 
- *           row pivoting ordering
- *
- *   (xlsub,lsub): lsub[*] contains the compressed subscript of
- *     rectangular supernodes; xlsub[j] points to the starting
- *     location of the j-th column in lsub[*]. Note that xlsub 
- *     is indexed by column.
- *     Storage: original row subscripts
- *
- *      During the course of sparse LU factorization, we also use
- *     (xlsub,lsub) for the purpose of symmetric pruning. For each
- *     supernode {s,s+1,...,t=s+r} with first column s and last
- *     column t, the subscript set
- *             lsub[j], j=xlsub[s], .., xlsub[s+1]-1
- *     is the structure of column s (i.e. structure of this supernode).
- *     It is used for the storage of numerical values.
- *     Furthermore,
- *             lsub[j], j=xlsub[t], .., xlsub[t+1]-1
- *     is the structure of the last column t of this supernode.
- *     It is for the purpose of symmetric pruning. Therefore, the
- *     structural subscripts can be rearranged without making physical
- *     interchanges among the numerical values.
- *
- *     However, if the supernode has only one column, then we
- *     only keep one set of subscripts. For any subscript interchange
- *     performed, similar interchange must be done on the numerical
- *     values.
- *
- *     The last column structures (for pruning) will be removed
- *     after the numercial LU factorization phase.
- *
- *   (xlusup,lusup): lusup[*] contains the numerical values of the
- *     rectangular supernodes; xlusup[j] points to the starting
- *     location of the j-th column in storage vector lusup[*]
- *     Note: xlusup is indexed by column.
- *     Each rectangular supernode is stored by column-major
- *     scheme, consistent with Fortran 2-dim array storage.
- *
- *   (xusub,ucol,usub): ucol[*] stores the numerical values of
- *     U-columns outside the rectangular supernodes. The row
- *     subscript of nonzero ucol[k] is stored in usub[k].
- *     xusub[i] points to the starting location of column i in ucol.
- *     Storage: new row subscripts; that is subscripts of PA.
- */
-typedef struct {
-    int     *xsup;    /* supernode and column mapping */
-    int     *supno;   
-    int     *lsub;    /* compressed L subscripts */
-    int            *xlsub;
-    complex  *lusup;   /* L supernodes */
-    int     *xlusup;
-    complex  *ucol;    /* U columns */
-    int     *usub;
-    int            *xusub;
-    int     nzlmax;   /* current max size of lsub */
-    int     nzumax;   /*    "    "    "      ucol */
-    int     nzlumax;  /*    "    "    "     lusup */
-    int     n;        /* number of columns in the matrix */
-    LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
-} GlobalLU_t;
-typedef struct {
-    float for_lu;
-    float total_needed;
-    int   expansions;
-} mem_usage_t;
-#ifdef __cplusplus
-extern "C" {
-/* Driver routines */
-extern void
-cgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
-      SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
-extern void
-cgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
-       char *, float *, float *, SuperMatrix *, SuperMatrix *,
-       void *, int, SuperMatrix *, SuperMatrix *,
-       float *, float *, float *, float *,
-       mem_usage_t *, SuperLUStat_t *, int *);
-/* Supernodal LU factor related */
-extern void
-cCreate_CompCol_Matrix(SuperMatrix *, int, int, int, complex *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-cCreate_CompRow_Matrix(SuperMatrix *, int, int, int, complex *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-cCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
-extern void
-cCreate_Dense_Matrix(SuperMatrix *, int, int, complex *, int,
-                    Stype_t, Dtype_t, Mtype_t);
-extern void
-cCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, complex *, 
-                        int *, int *, int *, int *, int *,
-                        Stype_t, Dtype_t, Mtype_t);
-extern void
-cCopy_Dense_Matrix(int, int, complex *, int, complex *, int);
-  // extern void    countnz (const int, int *, int *, int *, GlobalLU_t *);
-  // extern void    fixupL (const int, const int *, GlobalLU_t *);
-extern void    callocateA (int, int, complex **, int **, int **);
-extern void    cgstrf (superlu_options_t*, SuperMatrix*, float, 
-                       int, int, int*, void *, int, int *, int *, 
-                       SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
-extern int     csnode_dfs (const int, const int, const int *, const int *,
-                            const int *, int *, int *, GlobalLU_t *);
-extern int     csnode_bmod (const int, const int, const int, complex *,
-                              complex *, GlobalLU_t *, SuperLUStat_t*);
-extern void    cpanel_dfs (const int, const int, const int, SuperMatrix *,
-                          int *, int *, complex *, int *, int *, int *,
-                          int *, int *, int *, int *, GlobalLU_t *);
-extern void    cpanel_bmod (const int, const int, const int, const int,
-                           complex *, complex *, int *, int *,
-                          GlobalLU_t *, SuperLUStat_t*);
-extern int     ccolumn_dfs (const int, const int, int *, int *, int *, int *,
-                          int *, int *, int *, int *, int *, GlobalLU_t *);
-extern int     ccolumn_bmod (const int, const int, complex *,
-                          complex *, int *, int *, int,
-                           GlobalLU_t *, SuperLUStat_t*);
-extern int     ccopy_to_ucol (int, int, int *, int *, int *,
-                              complex *, GlobalLU_t *);         
-extern int     cpivotL (const int, const float, int *, int *, 
-                         int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
-extern void    cpruneL (const int, const int *, const int, const int,
-                         const int *, const int *, int *, GlobalLU_t *);
-extern void    creadmt (int *, int *, int *, complex **, int **, int **);
-extern void    cGenXtrue (int, int, complex *, int);
-extern void    cFillRHS (trans_t, int, complex *, int, SuperMatrix *,
-                         SuperMatrix *);
-extern void    cgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
-                        SuperMatrix *, SuperLUStat_t*, int *);
-/* Driver related */
-extern void    cgsequ (SuperMatrix *, float *, float *, float *,
-                       float *, float *, int *);
-extern void    claqgs (SuperMatrix *, float *, float *, float,
-                        float, float, char *);
-extern void    cgscon (char *, SuperMatrix *, SuperMatrix *, 
-                        float, float *, SuperLUStat_t*, int *);
-extern float   cPivotGrowth(int, SuperMatrix *, int *, 
-                            SuperMatrix *, SuperMatrix *);
-extern void    cgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
-                       SuperMatrix *, int *, int *, char *, float *, 
-                       float *, SuperMatrix *, SuperMatrix *,
-                       float *, float *, SuperLUStat_t*, int *);
-extern int     sp_ctrsv (char *, char *, char *, SuperMatrix *,
-                       SuperMatrix *, complex *, SuperLUStat_t*, int *);
-extern int     sp_cgemv (char *, complex, SuperMatrix *, complex *,
-                       int, complex, complex *, int);
-extern int     sp_cgemm (char *, char *, int, int, int, complex,
-                       SuperMatrix *, complex *, int, complex, 
-                       complex *, int);
-/* Memory-related */
-extern int     cLUMemInit (fact_t, void *, int, int, int, int, int,
-                            SuperMatrix *, SuperMatrix *,
-                            GlobalLU_t *, int **, complex **);
-extern void    cSetRWork (int, int, complex *, complex **, complex **);
-extern void    cLUWorkFree (int *, complex *, GlobalLU_t *);
-extern int     cLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
-extern complex  *complexMalloc(int);
-extern complex  *complexCalloc(int);
-extern float  *floatMalloc(int);
-extern float  *floatCalloc(int);
-extern int     cmemory_usage(const int, const int, const int, const int);
-extern int     cQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
-/* Auxiliary routines */
-extern void    creadhb(int *, int *, int *, complex **, int **, int **);
-extern void    cCompRow_to_CompCol(int, int, int, complex*, int*, int*,
-                                  complex **, int **, int **);
-extern void    cfill (complex *, int, complex);
-extern void    cinf_norm_error (int, SuperMatrix *, complex *);
-  // extern void    PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
-  //                    complex, complex, complex *, complex *, char *);
-/* Routines for debugging */
-extern void    cPrint_CompCol_Matrix(char *, SuperMatrix *);
-extern void    cPrint_SuperNode_Matrix(char *, SuperMatrix *);
-extern void    cPrint_Dense_Matrix(char *, SuperMatrix *);
-// extern void    print_lu_col(char *, int, int, int *, GlobalLU_t *);
-// extern void    check_tempv(int, complex *);
-#ifdef __cplusplus
-  }
-#endif /* __SUPERLU_cSP_DEFS */
diff --git a/superlu/slu_dcomplex.h b/superlu/slu_dcomplex.h
deleted file mode 100644
index 68dc1ce6..00000000
--- a/superlu/slu_dcomplex.h
+++ /dev/null
@@ -1,93 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#ifndef __SUPERLU_DCOMPLEX /* allow multiple inclusions */
- * This header file is to be included in source files z*.c
- */
-typedef struct { double r, i; } doublecomplex;
-/* Macro definitions */
-/* Complex Addition c = a + b */
-#define z_add(c, a, b) { (c)->r = (a)->r + (b)->r; \
-                        (c)->i = (a)->i + (b)->i; }
-/* Complex Subtraction c = a - b */
-#define z_sub(c, a, b) { (c)->r = (a)->r - (b)->r; \
-                        (c)->i = (a)->i - (b)->i; }
-/* Complex-Double Multiplication */
-#define zd_mult(c, a, b) { (c)->r = (a)->r * (b); \
-                           (c)->i = (a)->i * (b); }
-/* Complex-Complex Multiplication */
-#define zz_mult(c, a, b) { \
-       double cr, ci; \
-       cr = (a)->r * (b)->r - (a)->i * (b)->i; \
-       ci = (a)->i * (b)->r + (a)->r * (b)->i; \
-       (c)->r = cr; \
-       (c)->i = ci; \
-    }
-#define zz_conj(a, b) { \
-        (a)->r = (b)->r; \
-        (a)->i = -((b)->i); \
-    }
-/* Complex equality testing */
-#define z_eq(a, b)  ( (a)->r == (b)->r && (a)->i == (b)->i )
-#ifdef __cplusplus
-extern "C" {
-/* Prototypes for functions in dcomplex.c */
-void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
-double z_abs(doublecomplex *);     /* exact */
-double z_abs1(doublecomplex *);    /* approximate */
-void z_exp(doublecomplex *, doublecomplex *);
-void d_cnjg(doublecomplex *r, doublecomplex *z);
-double d_imag(doublecomplex *);
-#ifdef __cplusplus
-  }
-#endif  /* __SUPERLU_DCOMPLEX */
diff --git a/superlu/slu_ddefs.h b/superlu/slu_ddefs.h
deleted file mode 100644
index de3f7e84..00000000
--- a/superlu/slu_ddefs.h
+++ /dev/null
@@ -1,243 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#ifndef __SUPERLU_dSP_DEFS /* allow multiple inclusions */
-#define __SUPERLU_dSP_DEFS
- * File name:          dsp_defs.h
- * Purpose:             Sparse matrix types and function prototypes
- * History:
- */
-#ifdef _CRAY
-#include <fortran.h>
-#include <string.h>
-/* Define my integer type int_t */
-typedef int int_t; /* default */
-#include "slu_Cnames.h"
-#include "supermatrix.h"
-#include "slu_util.h"
- * Global data structures used in LU factorization -
- * 
- *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
- *   (xsup,supno): supno[i] is the supernode no to which i belongs;
- *     xsup(s) points to the beginning of the s-th supernode.
- *     e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
- *             xsup 0 1 2 4 7 12
- *     Note: dfs will be performed on supernode rep. relative to the new 
- *           row pivoting ordering
- *
- *   (xlsub,lsub): lsub[*] contains the compressed subscript of
- *     rectangular supernodes; xlsub[j] points to the starting
- *     location of the j-th column in lsub[*]. Note that xlsub 
- *     is indexed by column.
- *     Storage: original row subscripts
- *
- *      During the course of sparse LU factorization, we also use
- *     (xlsub,lsub) for the purpose of symmetric pruning. For each
- *     supernode {s,s+1,...,t=s+r} with first column s and last
- *     column t, the subscript set
- *             lsub[j], j=xlsub[s], .., xlsub[s+1]-1
- *     is the structure of column s (i.e. structure of this supernode).
- *     It is used for the storage of numerical values.
- *     Furthermore,
- *             lsub[j], j=xlsub[t], .., xlsub[t+1]-1
- *     is the structure of the last column t of this supernode.
- *     It is for the purpose of symmetric pruning. Therefore, the
- *     structural subscripts can be rearranged without making physical
- *     interchanges among the numerical values.
- *
- *     However, if the supernode has only one column, then we
- *     only keep one set of subscripts. For any subscript interchange
- *     performed, similar interchange must be done on the numerical
- *     values.
- *
- *     The last column structures (for pruning) will be removed
- *     after the numercial LU factorization phase.
- *
- *   (xlusup,lusup): lusup[*] contains the numerical values of the
- *     rectangular supernodes; xlusup[j] points to the starting
- *     location of the j-th column in storage vector lusup[*]
- *     Note: xlusup is indexed by column.
- *     Each rectangular supernode is stored by column-major
- *     scheme, consistent with Fortran 2-dim array storage.
- *
- *   (xusub,ucol,usub): ucol[*] stores the numerical values of
- *     U-columns outside the rectangular supernodes. The row
- *     subscript of nonzero ucol[k] is stored in usub[k].
- *     xusub[i] points to the starting location of column i in ucol.
- *     Storage: new row subscripts; that is subscripts of PA.
- */
-typedef struct {
-    int     *xsup;    /* supernode and column mapping */
-    int     *supno;   
-    int     *lsub;    /* compressed L subscripts */
-    int            *xlsub;
-    double  *lusup;   /* L supernodes */
-    int     *xlusup;
-    double  *ucol;    /* U columns */
-    int     *usub;
-    int            *xusub;
-    int     nzlmax;   /* current max size of lsub */
-    int     nzumax;   /*    "    "    "      ucol */
-    int     nzlumax;  /*    "    "    "     lusup */
-    int     n;        /* number of columns in the matrix */
-    LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
-} GlobalLU_t;
-typedef struct {
-    float for_lu;
-    float total_needed;
-    int   expansions;
-} mem_usage_t;
-#ifdef __cplusplus
-extern "C" {
-/* Driver routines */
-extern void
-dgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
-      SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
-extern void
-dgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
-       char *, double *, double *, SuperMatrix *, SuperMatrix *,
-       void *, int, SuperMatrix *, SuperMatrix *,
-       double *, double *, double *, double *,
-       mem_usage_t *, SuperLUStat_t *, int *);
-/* Supernodal LU factor related */
-extern void
-dCreate_CompCol_Matrix(SuperMatrix *, int, int, int, double *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-dCreate_CompRow_Matrix(SuperMatrix *, int, int, int, double *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-dCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
-extern void
-dCreate_Dense_Matrix(SuperMatrix *, int, int, double *, int,
-                    Stype_t, Dtype_t, Mtype_t);
-extern void
-dCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, double *, 
-                        int *, int *, int *, int *, int *,
-                        Stype_t, Dtype_t, Mtype_t);
-extern void
-dCopy_Dense_Matrix(int, int, double *, int, double *, int);
-extern void    countnz (const int, int *, int *, int *, GlobalLU_t *);
-extern void    fixupL (const int, const int *, GlobalLU_t *);
-extern void    dallocateA (int, int, double **, int **, int **);
-extern void    dgstrf (superlu_options_t*, SuperMatrix*, double, 
-                       int, int, int*, void *, int, int *, int *, 
-                       SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
-extern int     dsnode_dfs (const int, const int, const int *, const int *,
-                            const int *, int *, int *, GlobalLU_t *);
-extern int     dsnode_bmod (const int, const int, const int, double *,
-                              double *, GlobalLU_t *, SuperLUStat_t*);
-extern void    dpanel_dfs (const int, const int, const int, SuperMatrix *,
-                          int *, int *, double *, int *, int *, int *,
-                          int *, int *, int *, int *, GlobalLU_t *);
-extern void    dpanel_bmod (const int, const int, const int, const int,
-                           double *, double *, int *, int *,
-                          GlobalLU_t *, SuperLUStat_t*);
-extern int     dcolumn_dfs (const int, const int, int *, int *, int *, int *,
-                          int *, int *, int *, int *, int *, GlobalLU_t *);
-extern int     dcolumn_bmod (const int, const int, double *,
-                          double *, int *, int *, int,
-                           GlobalLU_t *, SuperLUStat_t*);
-extern int     dcopy_to_ucol (int, int, int *, int *, int *,
-                              double *, GlobalLU_t *);         
-extern int     dpivotL (const int, const double, int *, int *, 
-                         int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
-extern void    dpruneL (const int, const int *, const int, const int,
-                         const int *, const int *, int *, GlobalLU_t *);
-extern void    dreadmt (int *, int *, int *, double **, int **, int **);
-extern void    dGenXtrue (int, int, double *, int);
-extern void    dFillRHS (trans_t, int, double *, int, SuperMatrix *,
-                         SuperMatrix *);
-extern void    dgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
-                        SuperMatrix *, SuperLUStat_t*, int *);
-/* Driver related */
-extern void    dgsequ (SuperMatrix *, double *, double *, double *,
-                       double *, double *, int *);
-extern void    dlaqgs (SuperMatrix *, double *, double *, double,
-                        double, double, char *);
-extern void    dgscon (char *, SuperMatrix *, SuperMatrix *, 
-                        double, double *, SuperLUStat_t*, int *);
-extern double   dPivotGrowth(int, SuperMatrix *, int *, 
-                            SuperMatrix *, SuperMatrix *);
-extern void    dgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
-                       SuperMatrix *, int *, int *, char *, double *, 
-                       double *, SuperMatrix *, SuperMatrix *,
-                       double *, double *, SuperLUStat_t*, int *);
-extern int     sp_dtrsv (char *, char *, char *, SuperMatrix *,
-                       SuperMatrix *, double *, SuperLUStat_t*, int *);
-extern int     sp_dgemv (char *, double, SuperMatrix *, double *,
-                       int, double, double *, int);
-extern int     sp_dgemm (char *, char *, int, int, int, double,
-                       SuperMatrix *, double *, int, double, 
-                       double *, int);
-/* Memory-related */
-extern int     dLUMemInit (fact_t, void *, int, int, int, int, int,
-                            SuperMatrix *, SuperMatrix *,
-                            GlobalLU_t *, int **, double **);
-extern void    dSetRWork (int, int, double *, double **, double **);
-extern void    dLUWorkFree (int *, double *, GlobalLU_t *);
-extern int     dLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
-extern double  *doubleMalloc(int);
-extern double  *doubleCalloc(int);
-extern int     dmemory_usage(const int, const int, const int, const int);
-extern int     dQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
-/* Auxiliary routines */
-extern void    dreadhb(int *, int *, int *, double **, int **, int **);
-extern void    dCompRow_to_CompCol(int, int, int, double*, int*, int*,
-                                  double **, int **, int **);
-extern void    dfill (double *, int, double);
-extern void    dinf_norm_error (int, SuperMatrix *, double *);
-extern void    PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
-                        double, double, double *, double *, char *);
-/* Routines for debugging */
-extern void    dPrint_CompCol_Matrix(char *, SuperMatrix *);
-extern void    dPrint_SuperNode_Matrix(char *, SuperMatrix *);
-extern void    dPrint_Dense_Matrix(char *, SuperMatrix *);
-extern void    print_lu_col(char *, int, int, int *, GlobalLU_t *);
-extern void    check_tempv(int, double *);
-#ifdef __cplusplus
-  }
-#endif /* __SUPERLU_dSP_DEFS */
diff --git a/superlu/slu_scomplex.h b/superlu/slu_scomplex.h
deleted file mode 100644
index 4f653b15..00000000
--- a/superlu/slu_scomplex.h
+++ /dev/null
@@ -1,93 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#ifndef __SUPERLU_SCOMPLEX /* allow multiple inclusions */
- * This header file is to be included in source files c*.c
- */
-typedef struct { float r, i; } complex;
-/* Macro definitions */
-/* Complex Addition c = a + b */
-#define c_add(c, a, b) { (c)->r = (a)->r + (b)->r; \
-                        (c)->i = (a)->i + (b)->i; }
-/* Complex Subtraction c = a - b */
-#define c_sub(c, a, b) { (c)->r = (a)->r - (b)->r; \
-                        (c)->i = (a)->i - (b)->i; }
-/* Complex-Double Multiplication */
-#define cs_mult(c, a, b) { (c)->r = (a)->r * (b); \
-                           (c)->i = (a)->i * (b); }
-/* Complex-Complex Multiplication */
-#define cc_mult(c, a, b) { \
-       float cr, ci; \
-       cr = (a)->r * (b)->r - (a)->i * (b)->i; \
-       ci = (a)->i * (b)->r + (a)->r * (b)->i; \
-       (c)->r = cr; \
-       (c)->i = ci; \
-    }
-#define cc_conj(a, b) { \
-        (a)->r = (b)->r; \
-        (a)->i = -((b)->i); \
-    }
-/* Complex equality testing */
-#define c_eq(a, b)  ( (a)->r == (b)->r && (a)->i == (b)->i )
-#ifdef __cplusplus
-extern "C" {
-/* Prototypes for functions in scomplex.c */
-void c_div(complex *, complex *, complex *);
-double c_abs(complex *);     /* exact */
-double c_abs1(complex *);    /* approximate */
-void c_exp(complex *, complex *);
-void r_cnjg(complex *, complex *);
-double r_imag(complex *);
-#ifdef __cplusplus
-  }
-#endif  /* __SUPERLU_SCOMPLEX */
diff --git a/superlu/slu_sdefs.h b/superlu/slu_sdefs.h
deleted file mode 100644
index 5177d5cf..00000000
--- a/superlu/slu_sdefs.h
+++ /dev/null
@@ -1,243 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#ifndef __SUPERLU_sSP_DEFS /* allow multiple inclusions */
-#define __SUPERLU_sSP_DEFS
- * File name:          ssp_defs.h
- * Purpose:             Sparse matrix types and function prototypes
- * History:
- */
-#ifdef _CRAY
-#include <fortran.h>
-#include <string.h>
-/* Define my integer type int_t */
-typedef int int_t; /* default */
-#include "slu_Cnames.h"
-#include "supermatrix.h"
-#include "slu_util.h"
- * Global data structures used in LU factorization -
- * 
- *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
- *   (xsup,supno): supno[i] is the supernode no to which i belongs;
- *     xsup(s) points to the beginning of the s-th supernode.
- *     e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
- *             xsup 0 1 2 4 7 12
- *     Note: dfs will be performed on supernode rep. relative to the new 
- *           row pivoting ordering
- *
- *   (xlsub,lsub): lsub[*] contains the compressed subscript of
- *     rectangular supernodes; xlsub[j] points to the starting
- *     location of the j-th column in lsub[*]. Note that xlsub 
- *     is indexed by column.
- *     Storage: original row subscripts
- *
- *      During the course of sparse LU factorization, we also use
- *     (xlsub,lsub) for the purpose of symmetric pruning. For each
- *     supernode {s,s+1,...,t=s+r} with first column s and last
- *     column t, the subscript set
- *             lsub[j], j=xlsub[s], .., xlsub[s+1]-1
- *     is the structure of column s (i.e. structure of this supernode).
- *     It is used for the storage of numerical values.
- *     Furthermore,
- *             lsub[j], j=xlsub[t], .., xlsub[t+1]-1
- *     is the structure of the last column t of this supernode.
- *     It is for the purpose of symmetric pruning. Therefore, the
- *     structural subscripts can be rearranged without making physical
- *     interchanges among the numerical values.
- *
- *     However, if the supernode has only one column, then we
- *     only keep one set of subscripts. For any subscript interchange
- *     performed, similar interchange must be done on the numerical
- *     values.
- *
- *     The last column structures (for pruning) will be removed
- *     after the numercial LU factorization phase.
- *
- *   (xlusup,lusup): lusup[*] contains the numerical values of the
- *     rectangular supernodes; xlusup[j] points to the starting
- *     location of the j-th column in storage vector lusup[*]
- *     Note: xlusup is indexed by column.
- *     Each rectangular supernode is stored by column-major
- *     scheme, consistent with Fortran 2-dim array storage.
- *
- *   (xusub,ucol,usub): ucol[*] stores the numerical values of
- *     U-columns outside the rectangular supernodes. The row
- *     subscript of nonzero ucol[k] is stored in usub[k].
- *     xusub[i] points to the starting location of column i in ucol.
- *     Storage: new row subscripts; that is subscripts of PA.
- */
-typedef struct {
-    int     *xsup;    /* supernode and column mapping */
-    int     *supno;   
-    int     *lsub;    /* compressed L subscripts */
-    int            *xlsub;
-    float  *lusup;   /* L supernodes */
-    int     *xlusup;
-    float  *ucol;    /* U columns */
-    int     *usub;
-    int            *xusub;
-    int     nzlmax;   /* current max size of lsub */
-    int     nzumax;   /*    "    "    "      ucol */
-    int     nzlumax;  /*    "    "    "     lusup */
-    int     n;        /* number of columns in the matrix */
-    LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
-} GlobalLU_t;
-typedef struct {
-    float for_lu;
-    float total_needed;
-    int   expansions;
-} mem_usage_t;
-#ifdef __cplusplus
-extern "C" {
-/* Driver routines */
-extern void
-sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
-      SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
-extern void
-sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
-       char *, float *, float *, SuperMatrix *, SuperMatrix *,
-       void *, int, SuperMatrix *, SuperMatrix *,
-       float *, float *, float *, float *,
-       mem_usage_t *, SuperLUStat_t *, int *);
-/* Supernodal LU factor related */
-extern void
-sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, float *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, float *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
-extern void
-sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int,
-                    Stype_t, Dtype_t, Mtype_t);
-extern void
-sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, float *, 
-                        int *, int *, int *, int *, int *,
-                        Stype_t, Dtype_t, Mtype_t);
-extern void
-sCopy_Dense_Matrix(int, int, float *, int, float *, int);
-  // extern void    countnz (const int, int *, int *, int *, GlobalLU_t *);
-  // extern void    fixupL (const int, const int *, GlobalLU_t *);
-extern void    sallocateA (int, int, float **, int **, int **);
-extern void    sgstrf (superlu_options_t*, SuperMatrix*, float, 
-                       int, int, int*, void *, int, int *, int *, 
-                       SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
-extern int     ssnode_dfs (const int, const int, const int *, const int *,
-                            const int *, int *, int *, GlobalLU_t *);
-extern int     ssnode_bmod (const int, const int, const int, float *,
-                              float *, GlobalLU_t *, SuperLUStat_t*);
-extern void    spanel_dfs (const int, const int, const int, SuperMatrix *,
-                          int *, int *, float *, int *, int *, int *,
-                          int *, int *, int *, int *, GlobalLU_t *);
-extern void    spanel_bmod (const int, const int, const int, const int,
-                           float *, float *, int *, int *,
-                          GlobalLU_t *, SuperLUStat_t*);
-extern int     scolumn_dfs (const int, const int, int *, int *, int *, int *,
-                          int *, int *, int *, int *, int *, GlobalLU_t *);
-extern int     scolumn_bmod (const int, const int, float *,
-                          float *, int *, int *, int,
-                           GlobalLU_t *, SuperLUStat_t*);
-extern int     scopy_to_ucol (int, int, int *, int *, int *,
-                              float *, GlobalLU_t *);         
-extern int     spivotL (const int, const float, int *, int *, 
-                         int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
-extern void    spruneL (const int, const int *, const int, const int,
-                         const int *, const int *, int *, GlobalLU_t *);
-extern void    sreadmt (int *, int *, int *, float **, int **, int **);
-extern void    sGenXtrue (int, int, float *, int);
-extern void    sFillRHS (trans_t, int, float *, int, SuperMatrix *,
-                         SuperMatrix *);
-extern void    sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
-                        SuperMatrix *, SuperLUStat_t*, int *);
-/* Driver related */
-extern void    sgsequ (SuperMatrix *, float *, float *, float *,
-                       float *, float *, int *);
-extern void    slaqgs (SuperMatrix *, float *, float *, float,
-                        float, float, char *);
-extern void    sgscon (char *, SuperMatrix *, SuperMatrix *, 
-                        float, float *, SuperLUStat_t*, int *);
-extern float   sPivotGrowth(int, SuperMatrix *, int *, 
-                            SuperMatrix *, SuperMatrix *);
-extern void    sgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
-                       SuperMatrix *, int *, int *, char *, float *, 
-                       float *, SuperMatrix *, SuperMatrix *,
-                       float *, float *, SuperLUStat_t*, int *);
-extern int     sp_strsv (char *, char *, char *, SuperMatrix *,
-                       SuperMatrix *, float *, SuperLUStat_t*, int *);
-extern int     sp_sgemv (char *, float, SuperMatrix *, float *,
-                       int, float, float *, int);
-extern int     sp_sgemm (char *, char *, int, int, int, float,
-                       SuperMatrix *, float *, int, float, 
-                       float *, int);
-/* Memory-related */
-extern int     sLUMemInit (fact_t, void *, int, int, int, int, int,
-                            SuperMatrix *, SuperMatrix *,
-                            GlobalLU_t *, int **, float **);
-extern void    sSetRWork (int, int, float *, float **, float **);
-extern void    sLUWorkFree (int *, float *, GlobalLU_t *);
-extern int     sLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
-extern float  *floatMalloc(int);
-extern float  *floatCalloc(int);
-extern int     smemory_usage(const int, const int, const int, const int);
-extern int     sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
-/* Auxiliary routines */
-extern void    sreadhb(int *, int *, int *, float **, int **, int **);
-extern void    sCompRow_to_CompCol(int, int, int, float*, int*, int*,
-                                  float **, int **, int **);
-extern void    sfill (float *, int, float);
-extern void    sinf_norm_error (int, SuperMatrix *, float *);
-  // extern void    PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
-  //                    float, float, float *, float *, char *);
-/* Routines for debugging */
-extern void    sPrint_CompCol_Matrix(char *, SuperMatrix *);
-extern void    sPrint_SuperNode_Matrix(char *, SuperMatrix *);
-extern void    sPrint_Dense_Matrix(char *, SuperMatrix *);
-// extern void    print_lu_col(char *, int, int, int *, GlobalLU_t *);
-// extern void    check_tempv(int, float *);
-#ifdef __cplusplus
-  }
-#endif /* __SUPERLU_sSP_DEFS */
diff --git a/superlu/slu_util.h b/superlu/slu_util.h
deleted file mode 100644
index afed257a..00000000
--- a/superlu/slu_util.h
+++ /dev/null
@@ -1,287 +0,0 @@
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#ifndef __SUPERLU_UTIL /* allow multiple inclusions */
-#define __SUPERLU_UTIL
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef __STDC__
-#include <malloc.h>
-#include <assert.h>
- * Macros
- ***********************************************************************/
-#define FIRSTCOL_OF_SNODE(i)   (xsup[i])
-/* No of marker arrays used in the symbolic factorization,
-   each of size n */
-#define NO_MARKER     3
-#define NUM_TEMPV(m,w,t,b)  ( SUPERLU_MAX(m, (t + b)*w) )
-#ifndef USER_ABORT
-#define USER_ABORT(msg) superlu_abort_and_exit(msg)
-#define ABORT(err_msg) \
- { char msg[256];\
-   sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
-   USER_ABORT(msg); }
-#ifndef USER_MALLOC
-#if 1
-#define USER_MALLOC(size) superlu_malloc(size)
-/* The following may check out some uninitialized data */
-#define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size)
-#define SUPERLU_MALLOC(size) USER_MALLOC(size)
-#ifndef USER_FREE
-#define USER_FREE(addr) superlu_free(addr)
-#define SUPERLU_FREE(addr) USER_FREE(addr)
-#define CHECK_MALLOC(where) {                 \
-    extern int superlu_malloc_total;        \
-    printf("%s: malloc_total %d Bytes\n",     \
-          where, superlu_malloc_total); \
-#define SUPERLU_MAX(x, y)      ( (x) > (y) ? (x) : (y) )
-#define SUPERLU_MIN(x, y)      ( (x) < (y) ? (x) : (y) )
- * Constants 
- ***********************************************************************/
-#define EMPTY  (-1)
-/*#define NO   (-1)*/
-#define FALSE  0
-#define TRUE   1
- * Enumerate types
- ***********************************************************************/
-typedef enum {NO, YES}                                          yes_no_t;
-typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t;
-typedef enum {NOROWPERM, LargeDiag, MY_PERMR}                   rowperm_t;
-typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC}colperm_t;
-typedef enum {NOTRANS, TRANS, CONJ}                             trans_t;
-typedef enum {NOEQUIL, ROW, COL, BOTH}                          DiagScale_t;
-typedef enum {NOREFINE, SINGLE=1, DOUBLE, EXTRA}                IterRefine_t;
-typedef enum {LUSUP, UCOL, LSUB, USUB}                          MemType;
-typedef enum {HEAD, TAIL}                                       stack_end_t;
-typedef enum {SYSTEM, USER}                                     LU_space_t;
- * The following enumerate type is used by the statistics variable 
- * to keep track of flop count and time spent at various stages.
- *
- * Note that not all of the fields are disjoint.
- */
-typedef enum {
-    COLPERM, /* find a column ordering that minimizes fills */
-    RELAX,   /* find artificial supernodes */
-    ETREE,   /* compute column etree */
-    EQUIL,   /* equilibrate the original matrix */
-    FACT,    /* perform LU factorization */
-    RCOND,   /* estimate reciprocal condition number */
-    SOLVE,   /* forward and back solves */
-    REFINE,  /* perform iterative refinement */
-    TRSV,    /* fraction of FACT spent in xTRSV */
-    GEMV,    /* fraction of FACT spent in xGEMV */
-    FERR,    /* estimate error bounds after iterative refinement */
-    NPHASES  /* total number of phases */
-} PhaseType;
- * Type definitions
- ***********************************************************************/
-typedef float    flops_t;
-typedef unsigned char Logical;
- *-- This contains the options used to control the solve process.
- *
- * Fact   (fact_t)
- *        Specifies whether or not the factored form of the matrix
- *        A is supplied on entry, and if not, how the matrix A should
- *        be factorizaed.
- *        = DOFACT: The matrix A will be factorized from scratch, and the
- *             factors will be stored in L and U.
- *        = SamePattern: The matrix A will be factorized assuming
- *             that a factorization of a matrix with the same sparsity
- *             pattern was performed prior to this one. Therefore, this
- *             factorization will reuse column permutation vector 
- *             ScalePermstruct->perm_c and the column elimination tree
- *             LUstruct->etree.
- *        = SamePattern_SameRowPerm: The matrix A will be factorized
- *             assuming that a factorization of a matrix with the same
- *             sparsity        pattern and similar numerical values was 
- *             prior to this one. Therefore, this factorization will reuse
- *             both row and column scaling factors R and C, both row and
- *             column permutation vectors perm_r and perm_c, and the
- *             data structure set up from the previous symbolic factorization.
- *        = FACTORED: On entry, L, U, perm_r and perm_c contain the 
- *              factored form of A. If DiagScale is not NOEQUIL, the matrix
- *              A has been equilibrated with scaling factors R and C.
- *
- * Equil  (yes_no_t)
- *        Specifies whether to equilibrate the system (scale A's row and
- *        columns to have unit norm).
- *
- * ColPerm (colperm_t)
- *        Specifies what type of column permutation to use to reduce fill.
- *        = NATURAL: use the natural ordering 
- *        = MMD_ATA: use minimum degree ordering on structure of A'*A
- *        = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
- *        = COLAMD: use approximate minimum degree column ordering
- *        = MY_PERMC: use the ordering specified in ScalePermstruct->perm_c[]
- *         
- * Trans  (trans_t)
- *        Specifies the form of the system of equations:
- *        = NOTRANS: A * X = B        (No transpose)
- *        = TRANS:   A**T * X = B     (Transpose)
- *        = CONJ:    A**H * X = B     (Transpose)
- *
- * IterRefine (IterRefine_t)
- *        Specifies whether to perform iterative refinement.
- *        = NO: no iterative refinement
- *        = WorkingPrec: perform iterative refinement in working precision
- *        = ExtraPrec: perform iterative refinement in extra precision
- *
- * PrintStat (yes_no_t)
- *        Specifies whether to print the solver's statistics.
- *
- * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU)
- *        Specifies the threshold used for a diagonal entry to be an
- *        acceptable pivot.
- *
- * PivotGrowth (yes_no_t)
- *        Specifies whether to compute the reciprocal pivot growth.
- *
- * ConditionNumber (ues_no_t)
- *        Specifies whether to compute the reciprocal condition number.
- *
- * RowPerm (rowperm_t) (only for SuperLU_DIST)
- *        Specifies whether to permute rows of the original matrix.
- *        = NO: not to permute the rows
- *        = LargeDiag: make the diagonal large relative to the off-diagonal
- *        = MY_PERMR: use the permutation given in ScalePermstruct->perm_r[]
- *           
- * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
- *        Specifies whether to replace the tiny diagonals by
- *        sqrt(epsilon)*||A|| during LU factorization.
- *
- * SolveInitialized (yes_no_t) (only for SuperLU_DIST)
- *        Specifies whether the initialization has been performed to the
- *        triangular solve.
- *
- * RefineInitialized (yes_no_t) (only for SuperLU_DIST)
- *        Specifies whether the initialization has been performed to the
- *        sparse matrix-vector multiplication routine needed in iterative
- *        refinement.
- */
-typedef struct {
-    fact_t        Fact;
-    yes_no_t      Equil;
-    colperm_t     ColPerm;
-    trans_t       Trans;
-    IterRefine_t  IterRefine;
-    yes_no_t      PrintStat;
-    yes_no_t      SymmetricMode;
-    double        DiagPivotThresh;
-    yes_no_t      PivotGrowth;
-    yes_no_t      ConditionNumber;
-    rowperm_t     RowPerm;
-    yes_no_t      ReplaceTinyPivot;
-    yes_no_t      SolveInitialized;
-    yes_no_t      RefineInitialized;
-} superlu_options_t;
-typedef struct {
-    int     *panel_histo; /* histogram of panel size distribution */
-    double  *utime;       /* running time at various phases */
-    flops_t *ops;         /* operation count at various phases */
-    int     TinyPivots;   /* number of tiny pivots */
-    int     RefineSteps;  /* number of iterative refinement steps */
-} SuperLUStat_t;
- * Prototypes
- ***********************************************************************/
-#ifdef __cplusplus
-extern "C" {
-extern void    Destroy_SuperMatrix_Store(SuperMatrix *);
-extern void    Destroy_CompCol_Matrix(SuperMatrix *);
-extern void    Destroy_CompRow_Matrix(SuperMatrix *);
-extern void    Destroy_SuperNode_Matrix(SuperMatrix *);
-extern void    Destroy_CompCol_Permuted(SuperMatrix *);
-extern void    Destroy_Dense_Matrix(SuperMatrix *);
-extern void    get_perm_c(int, SuperMatrix *, int *);
-extern void    set_default_options(superlu_options_t *options);
-extern void    sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*,
-                           SuperMatrix*);
-extern void    superlu_abort_and_exit(char*);
-extern void    *superlu_malloc (size_t);
-extern int     *intMalloc (int);
-extern int     *intCalloc (int);
-extern void    superlu_free (void*);
-extern void    SetIWork (int, int, int, int *, int **, int **, int **,
-                         int **, int **, int **, int **);
-extern int     sp_coletree (int *, int *, int *, int, int, int *);
-extern void    relax_snode (const int, int *, const int, int *, int *);
-extern void    heap_relax_snode (const int, int *, const int, int *, int *);
-extern void    resetrep_col (const int, const int *, int *);
-extern int     spcoletree (int *, int *, int *, int, int, int *);
-extern int     *TreePostorder (int, int *);
-extern double  SuperLU_timer_ ();
-extern int     sp_ienv (int);
-extern int     lsame_ (char *, char *);
-extern int     xerbla_ (char *, int *);
-extern void    ifill (int *, int, int);
-extern void    snode_profile (int, int *);
-extern void    super_stats (int, int *);
-extern void    PrintSumm (char *, int, int, int);
-extern void    StatInit(SuperLUStat_t *);
-extern void    StatPrint (SuperLUStat_t *);
-extern void    StatFree(SuperLUStat_t *);
-extern void    print_panel_seg(int, int, int, int, int *, int *);
-extern void    check_repfnz(int, int, int, int *);
-#ifdef __cplusplus
-  }
-#endif /* __SUPERLU_UTIL */
diff --git a/superlu/slu_zdefs.h b/superlu/slu_zdefs.h
deleted file mode 100644
index 2659d6a4..00000000
--- a/superlu/slu_zdefs.h
+++ /dev/null
@@ -1,246 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#ifndef __SUPERLU_zSP_DEFS /* allow multiple inclusions */
-#define __SUPERLU_zSP_DEFS
- * File name:          zsp_defs.h
- * Purpose:             Sparse matrix types and function prototypes
- * History:
- */
-#ifdef _CRAY
-#include <fortran.h>
-#include <string.h>
-/* Define my integer type int_t */
-typedef int int_t; /* default */
-#include "slu_Cnames.h"
-#include "supermatrix.h"
-#include "slu_util.h"
-#include "slu_dcomplex.h"
- * Global data structures used in LU factorization -
- * 
- *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
- *   (xsup,supno): supno[i] is the supernode no to which i belongs;
- *     xsup(s) points to the beginning of the s-th supernode.
- *     e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
- *             xsup 0 1 2 4 7 12
- *     Note: dfs will be performed on supernode rep. relative to the new 
- *           row pivoting ordering
- *
- *   (xlsub,lsub): lsub[*] contains the compressed subscript of
- *     rectangular supernodes; xlsub[j] points to the starting
- *     location of the j-th column in lsub[*]. Note that xlsub 
- *     is indexed by column.
- *     Storage: original row subscripts
- *
- *      During the course of sparse LU factorization, we also use
- *     (xlsub,lsub) for the purpose of symmetric pruning. For each
- *     supernode {s,s+1,...,t=s+r} with first column s and last
- *     column t, the subscript set
- *             lsub[j], j=xlsub[s], .., xlsub[s+1]-1
- *     is the structure of column s (i.e. structure of this supernode).
- *     It is used for the storage of numerical values.
- *     Furthermore,
- *             lsub[j], j=xlsub[t], .., xlsub[t+1]-1
- *     is the structure of the last column t of this supernode.
- *     It is for the purpose of symmetric pruning. Therefore, the
- *     structural subscripts can be rearranged without making physical
- *     interchanges among the numerical values.
- *
- *     However, if the supernode has only one column, then we
- *     only keep one set of subscripts. For any subscript interchange
- *     performed, similar interchange must be done on the numerical
- *     values.
- *
- *     The last column structures (for pruning) will be removed
- *     after the numercial LU factorization phase.
- *
- *   (xlusup,lusup): lusup[*] contains the numerical values of the
- *     rectangular supernodes; xlusup[j] points to the starting
- *     location of the j-th column in storage vector lusup[*]
- *     Note: xlusup is indexed by column.
- *     Each rectangular supernode is stored by column-major
- *     scheme, consistent with Fortran 2-dim array storage.
- *
- *   (xusub,ucol,usub): ucol[*] stores the numerical values of
- *     U-columns outside the rectangular supernodes. The row
- *     subscript of nonzero ucol[k] is stored in usub[k].
- *     xusub[i] points to the starting location of column i in ucol.
- *     Storage: new row subscripts; that is subscripts of PA.
- */
-typedef struct {
-    int     *xsup;    /* supernode and column mapping */
-    int     *supno;   
-    int     *lsub;    /* compressed L subscripts */
-    int            *xlsub;
-    doublecomplex  *lusup;   /* L supernodes */
-    int     *xlusup;
-    doublecomplex  *ucol;    /* U columns */
-    int     *usub;
-    int            *xusub;
-    int     nzlmax;   /* current max size of lsub */
-    int     nzumax;   /*    "    "    "      ucol */
-    int     nzlumax;  /*    "    "    "     lusup */
-    int     n;        /* number of columns in the matrix */
-    LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
-} GlobalLU_t;
-typedef struct {
-    float for_lu;
-    float total_needed;
-    int   expansions;
-} mem_usage_t;
-#ifdef __cplusplus
-extern "C" {
-/* Driver routines */
-extern void
-zgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
-      SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
-extern void
-zgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
-       char *, double *, double *, SuperMatrix *, SuperMatrix *,
-       void *, int, SuperMatrix *, SuperMatrix *,
-       double *, double *, double *, double *,
-       mem_usage_t *, SuperLUStat_t *, int *);
-/* Supernodal LU factor related */
-extern void
-zCreate_CompCol_Matrix(SuperMatrix *, int, int, int, doublecomplex *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-zCreate_CompRow_Matrix(SuperMatrix *, int, int, int, doublecomplex *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-zCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
-extern void
-zCreate_Dense_Matrix(SuperMatrix *, int, int, doublecomplex *, int,
-                    Stype_t, Dtype_t, Mtype_t);
-extern void
-zCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, doublecomplex *, 
-                        int *, int *, int *, int *, int *,
-                        Stype_t, Dtype_t, Mtype_t);
-extern void
-zCopy_Dense_Matrix(int, int, doublecomplex *, int, doublecomplex *, int);
-  // extern void    countnz (const int, int *, int *, int *, GlobalLU_t *);
-  // extern void    fixupL (const int, const int *, GlobalLU_t *);
-extern void    zallocateA (int, int, doublecomplex **, int **, int **);
-extern void    zgstrf (superlu_options_t*, SuperMatrix*, double, 
-                       int, int, int*, void *, int, int *, int *, 
-                       SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
-extern int     zsnode_dfs (const int, const int, const int *, const int *,
-                            const int *, int *, int *, GlobalLU_t *);
-extern int     zsnode_bmod (const int, const int, const int, doublecomplex *,
-                              doublecomplex *, GlobalLU_t *, SuperLUStat_t*);
-extern void    zpanel_dfs (const int, const int, const int, SuperMatrix *,
-                          int *, int *, doublecomplex *, int *, int *, int *,
-                          int *, int *, int *, int *, GlobalLU_t *);
-extern void    zpanel_bmod (const int, const int, const int, const int,
-                           doublecomplex *, doublecomplex *, int *, int *,
-                          GlobalLU_t *, SuperLUStat_t*);
-extern int     zcolumn_dfs (const int, const int, int *, int *, int *, int *,
-                          int *, int *, int *, int *, int *, GlobalLU_t *);
-extern int     zcolumn_bmod (const int, const int, doublecomplex *,
-                          doublecomplex *, int *, int *, int,
-                           GlobalLU_t *, SuperLUStat_t*);
-extern int     zcopy_to_ucol (int, int, int *, int *, int *,
-                              doublecomplex *, GlobalLU_t *);         
-extern int     zpivotL (const int, const double, int *, int *, 
-                         int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
-extern void    zpruneL (const int, const int *, const int, const int,
-                         const int *, const int *, int *, GlobalLU_t *);
-extern void    zreadmt (int *, int *, int *, doublecomplex **, int **, int **);
-extern void    zGenXtrue (int, int, doublecomplex *, int);
-extern void    zFillRHS (trans_t, int, doublecomplex *, int, SuperMatrix *,
-                         SuperMatrix *);
-extern void    zgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
-                        SuperMatrix *, SuperLUStat_t*, int *);
-/* Driver related */
-extern void    zgsequ (SuperMatrix *, double *, double *, double *,
-                       double *, double *, int *);
-extern void    zlaqgs (SuperMatrix *, double *, double *, double,
-                        double, double, char *);
-extern void    zgscon (char *, SuperMatrix *, SuperMatrix *, 
-                        double, double *, SuperLUStat_t*, int *);
-extern double   zPivotGrowth(int, SuperMatrix *, int *, 
-                            SuperMatrix *, SuperMatrix *);
-extern void    zgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
-                       SuperMatrix *, int *, int *, char *, double *, 
-                       double *, SuperMatrix *, SuperMatrix *,
-                       double *, double *, SuperLUStat_t*, int *);
-extern int     sp_ztrsv (char *, char *, char *, SuperMatrix *,
-                       SuperMatrix *, doublecomplex *, SuperLUStat_t*, int *);
-extern int     sp_zgemv (char *, doublecomplex, SuperMatrix *, doublecomplex *,
-                       int, doublecomplex, doublecomplex *, int);
-extern int     sp_zgemm (char *, char *, int, int, int, doublecomplex,
-                       SuperMatrix *, doublecomplex *, int, doublecomplex, 
-                       doublecomplex *, int);
-/* Memory-related */
-extern int     zLUMemInit (fact_t, void *, int, int, int, int, int,
-                            SuperMatrix *, SuperMatrix *,
-                            GlobalLU_t *, int **, doublecomplex **);
-extern void    zSetRWork (int, int, doublecomplex *, doublecomplex **, 
doublecomplex **);
-extern void    zLUWorkFree (int *, doublecomplex *, GlobalLU_t *);
-extern int     zLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
-extern doublecomplex  *doublecomplexMalloc(int);
-extern doublecomplex  *doublecomplexCalloc(int);
-extern double  *doubleMalloc(int);
-extern double  *doubleCalloc(int);
-extern int     zmemory_usage(const int, const int, const int, const int);
-extern int     zQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
-/* Auxiliary routines */
-extern void    zreadhb(int *, int *, int *, doublecomplex **, int **, int **);
-extern void    zCompRow_to_CompCol(int, int, int, doublecomplex*, int*, int*,
-                                  doublecomplex **, int **, int **);
-extern void    zfill (doublecomplex *, int, doublecomplex);
-extern void    zinf_norm_error (int, SuperMatrix *, doublecomplex *);
-  // extern void    PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
-  //                    doublecomplex, doublecomplex, doublecomplex *, 
doublecomplex *, char *);
-/* Routines for debugging */
-extern void    zPrint_CompCol_Matrix(char *, SuperMatrix *);
-extern void    zPrint_SuperNode_Matrix(char *, SuperMatrix *);
-extern void    zPrint_Dense_Matrix(char *, SuperMatrix *);
-// extern void    print_lu_col(char *, int, int, int *, GlobalLU_t *);
-// extern void    check_tempv(int, doublecomplex *);
-#ifdef __cplusplus
-  }
-#endif /* __SUPERLU_zSP_DEFS */
diff --git a/superlu/smemory.c b/superlu/smemory.c
deleted file mode 100644
index 0f2fbda4..00000000
--- a/superlu/smemory.c
+++ /dev/null
@@ -1,689 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_sdefs.h"
-/* Constants */
-#define NO_MEMTYPE  4      /* 0: lusup;
-                             1: ucol;
-                             2: lsub;
-                             3: usub */
-#define GluIntArray(n)   (5 * (n) + 5)
-/* Internal prototypes */
-void  *sexpand (int *, MemType,int, int, GlobalLU_t *);
-int   sLUWorkInit (int, int, int, int **, float **, LU_space_t);
-void  copy_mem_float (int, void *, void *);
-void  sStackCompress (GlobalLU_t *);
-void  sSetupSpace (void *, int, LU_space_t *);
-void  *suser_malloc (int, int);
-void  suser_free (int, int);
-/* External prototypes (in memory.c - prec-indep) */
-extern void    copy_mem_int    (int, void *, void *);
-extern void    user_bcopy      (char *, char *, int);
-/* Headers for 4 types of dynamatically managed memory */
-typedef struct e_node {
-    int size;      /* length of the memory that has been used */
-    void *mem;     /* pointer to the new malloc'd store */
-} ExpHeader;
-typedef struct {
-    int  size;
-    int  used;
-    int  top1;  /* grow upward, relative to &array[0] */
-    int  top2;  /* grow downward */
-    void *array;
-} LU_stack_t;
-/* Variables local to this file */
-static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
-static LU_stack_t stack;
-static int no_expand;
-/* Macros to manipulate stack */
-#define StackFull(x)         ( x + stack.used >= stack.size )
-#define NotDoubleAlign(addr) ( (long int)addr & 7 )
-#define DoubleAlign(addr)    ( ((long int)addr + 7) & ~7L )
-#define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
-                             (w + 1) * m * sizeof(float) )
-#define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
- * Setup the memory model to be used for factorization.
- *    lwork = 0: use system malloc;
- *    lwork > 0: use user-supplied work[] space.
- */
-void sSetupSpace(void *work, int lwork, LU_space_t *MemModel)
-    if ( lwork == 0 ) {
-       *MemModel = SYSTEM; /* malloc/free */
-    } else if ( lwork > 0 ) {
-       *MemModel = USER;   /* user provided space */
-       stack.used = 0;
-       stack.top1 = 0;
-       stack.top2 = (lwork/4)*4; /* must be word addressable */
-       stack.size = stack.top2;
-       stack.array = (void *) work;
-    }
-void *suser_malloc(int bytes, int which_end)
-    void *buf;
-    if ( StackFull(bytes) ) return (NULL);
-    if ( which_end == HEAD ) {
-       buf = (char*) stack.array + stack.top1;
-       stack.top1 += bytes;
-    } else {
-       stack.top2 -= bytes;
-       buf = (char*) stack.array + stack.top2;
-    }
-    stack.used += bytes;
-    return buf;
-void suser_free(int bytes, int which_end)
-    if ( which_end == HEAD ) {
-       stack.top1 -= bytes;
-    } else {
-       stack.top2 += bytes;
-    }
-    stack.used -= bytes;
- * mem_usage consists of the following fields:
- *    - for_lu (float)
- *      The amount of space used in bytes for the L\U data structures.
- *    - total_needed (float)
- *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
- */
-int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
-    SCformat *Lstore;
-    NCformat *Ustore;
-    register int n, iword, dword, panel_size = sp_ienv(1);
-    Lstore = L->Store;
-    Ustore = U->Store;
-    n = L->ncol;
-    iword = sizeof(int);
-    dword = sizeof(float);
-    /* For LU factors */
-    mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
-                                dword + Lstore->rowind_colptr[n] * iword );
-    mem_usage->for_lu += (float)( (n + 1) * iword +
-                                Ustore->colptr[n] * (dword + iword) );
-    /* Working storage to support factorization */
-    mem_usage->total_needed = mem_usage->for_lu +
-       (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
-               (panel_size + 1) * n * dword );
-    mem_usage->expansions = --no_expand;
-    return 0;
-} /* sQuerySpace */
- * Allocate storage for the data structures common to all factor routines.
- * For those unpredictable size, make a guess as FILL * nnz(A).
- * Return value:
- *     If lwork = -1, return the estimated amount of space required, plus n;
- *     otherwise, return the amount of space actually allocated when
- *     memory allocation failure occurred.
- */
-sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
-         int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
-         int **iwork, float **dwork)
-    int      info, iword, dword;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    int      *xsup, *supno;
-    int      *lsub, *xlsub;
-    float   *lusup;
-    int      *xlusup;
-    float   *ucol;
-    int      *usub, *xusub;
-    int      nzlmax, nzumax, nzlumax;
-    int      FILL = sp_ienv(6);
-    Glu->n    = n;
-    no_expand = 0;
-    iword     = sizeof(int);
-    dword     = sizeof(float);
-    if ( !expanders )  
-        expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
-    if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
-    if ( fact != SamePattern_SameRowPerm ) {
-       /* Guess for L\U factors */
-       nzumax = nzlumax = FILL * annz;
-       nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else {
-           sSetupSpace(work, lwork, &Glu->MemModel);
-       }
-#if ( PRNTlevel >= 1 )
-       printf("sLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
-              FILL, nzlmax, nzumax);
-       fflush(stdout);
-       /* Integer pointers for L\U factors */
-       if ( Glu->MemModel == SYSTEM ) {
-           xsup   = intMalloc(n+1);
-           supno  = intMalloc(n+1);
-           xlsub  = intMalloc(n+1);
-           xlusup = intMalloc(n+1);
-           xusub  = intMalloc(n+1);
-       } else {
-           xsup   = (int *)suser_malloc((n+1) * iword, HEAD);
-           supno  = (int *)suser_malloc((n+1) * iword, HEAD);
-           xlsub  = (int *)suser_malloc((n+1) * iword, HEAD);
-           xlusup = (int *)suser_malloc((n+1) * iword, HEAD);
-           xusub  = (int *)suser_malloc((n+1) * iword, HEAD);
-       }
-       lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
-       ucol  = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
-       lsub  = (int *)    sexpand( &nzlmax, LSUB, 0, 0, Glu );
-       usub  = (int *)    sexpand( &nzumax, USUB, 0, 1, Glu );
-       while ( !lusup || !ucol || !lsub || !usub ) {
-           if ( Glu->MemModel == SYSTEM ) {
-               SUPERLU_FREE(lusup); 
-               SUPERLU_FREE(ucol); 
-               SUPERLU_FREE(lsub); 
-               SUPERLU_FREE(usub);
-           } else {
-               suser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
-           }
-           nzlumax /= 2;
-           nzumax /= 2;
-           nzlmax /= 2;
-           if ( nzlumax < annz ) {
-               printf("Not enough memory to perform factorization.\n");
-               return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-           }
-#if ( PRNTlevel >= 1)
-           printf("sLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
-                  nzlmax, nzumax);
-           fflush(stdout);
-           lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
-           ucol  = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
-           lsub  = (int *)    sexpand( &nzlmax, LSUB, 0, 0, Glu );
-           usub  = (int *)    sexpand( &nzumax, USUB, 0, 1, Glu );
-       }
-    } else {
-       /* fact == SamePattern_SameRowPerm */
-       Lstore   = L->Store;
-       Ustore   = U->Store;
-       xsup     = Lstore->sup_to_col;
-       supno    = Lstore->col_to_sup;
-       xlsub    = Lstore->rowind_colptr;
-       xlusup   = Lstore->nzval_colptr;
-       xusub    = Ustore->colptr;
-       nzlmax   = Glu->nzlmax;    /* max from previous factorization */
-       nzumax   = Glu->nzumax;
-       nzlumax  = Glu->nzlumax;
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else if ( lwork == 0 ) {
-           Glu->MemModel = SYSTEM;
-       } else {
-           Glu->MemModel = USER;
-           stack.top2 = (lwork/4)*4; /* must be word-addressable */
-           stack.size = stack.top2;
-       }
-       lsub  = expanders[LSUB].mem  = Lstore->rowind;
-       lusup = expanders[LUSUP].mem = Lstore->nzval;
-       usub  = expanders[USUB].mem  = Ustore->rowind;
-       ucol  = expanders[UCOL].mem  = Ustore->nzval;;
-       expanders[LSUB].size         = nzlmax;
-       expanders[LUSUP].size        = nzlumax;
-       expanders[USUB].size         = nzumax;
-       expanders[UCOL].size         = nzumax;  
-    }
-    Glu->xsup    = xsup;
-    Glu->supno   = supno;
-    Glu->lsub    = lsub;
-    Glu->xlsub   = xlsub;
-    Glu->lusup   = lusup;
-    Glu->xlusup  = xlusup;
-    Glu->ucol    = ucol;
-    Glu->usub    = usub;
-    Glu->xusub   = xusub;
-    Glu->nzlmax  = nzlmax;
-    Glu->nzumax  = nzumax;
-    Glu->nzlumax = nzlumax;
-    info = sLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
-    if ( info )
-       return ( info + smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-    ++no_expand;
-    return 0;
-} /* sLUMemInit */
-/* Allocate known working storage. Returns 0 if success, otherwise
-   returns the number of bytes allocated so far when failure occurred. */
-sLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
-            float **dworkptr, LU_space_t MemModel)
-    int    isize, dsize, extra;
-    float *old_ptr;
-    int    maxsuper = sp_ienv(3),
-           rowblk   = sp_ienv(4);
-    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
-    dsize = (m * panel_size +
-            NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float);
-    if ( MemModel == SYSTEM ) 
-       *iworkptr = (int *) intCalloc(isize/sizeof(int));
-    else
-       *iworkptr = (int *) suser_malloc(isize, TAIL);
-    if ( ! *iworkptr ) {
-       fprintf(stderr, "sLUWorkInit: malloc fails for local iworkptr[]\n");
-       return (isize + n);
-    }
-    if ( MemModel == SYSTEM )
-       *dworkptr = (float *) SUPERLU_MALLOC(dsize);
-    else {
-       *dworkptr = (float *) suser_malloc(dsize, TAIL);
-       if ( NotDoubleAlign(*dworkptr) ) {
-           old_ptr = *dworkptr;
-           *dworkptr = (float*) DoubleAlign(*dworkptr);
-           *dworkptr = (float*) ((double*)*dworkptr - 1);
-           extra = (char*)old_ptr - (char*)*dworkptr;
-#ifdef DEBUG       
-           printf("sLUWorkInit: not aligned, extra %d\n", extra);
-           stack.top2 -= extra;
-           stack.used += extra;
-       }
-    }
-    if ( ! *dworkptr ) {
-       fprintf(stderr, "malloc fails for local dworkptr[].");
-       return (isize + dsize + n);
-    }
-    return 0;
- * Set up pointers for real working arrays.
- */
-sSetRWork(int m, int panel_size, float *dworkptr,
-        float **dense, float **tempv)
-    float zero = 0.0;
-    int maxsuper = sp_ienv(3),
-        rowblk   = sp_ienv(4);
-    *dense = dworkptr;
-    *tempv = *dense + panel_size*m;
-    sfill (*dense, m * panel_size, zero);
-    sfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
- * Free the working storage used by factor routines.
- */
-void sLUWorkFree(int *iwork, float *dwork, GlobalLU_t *Glu)
-    if ( Glu->MemModel == SYSTEM ) {
-       SUPERLU_FREE (iwork);
-       SUPERLU_FREE (dwork);
-    } else {
-       stack.used -= (stack.size - stack.top2);
-       stack.top2 = stack.size;
-/*     sStackCompress(Glu);  */
-    }
-    SUPERLU_FREE (expanders);  
-    expanders = 0;
-/* Expand the data structures for L and U during the factorization.
- * Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-sLUMemXpand(int jcol,
-          int next,          /* number of elements currently in the factors */
-          MemType mem_type,  /* which type of memory to expand  */
-          int *maxlen,       /* modified - maximum length of a data structure 
-          GlobalLU_t *Glu    /* modified - global LU data structures */
-          )
-    void   *new_mem;
-#ifdef DEBUG    
-    printf("sLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
-          jcol, next, *maxlen, mem_type);
-    if (mem_type == USUB) 
-       new_mem = sexpand(maxlen, mem_type, next, 1, Glu);
-    else
-       new_mem = sexpand(maxlen, mem_type, next, 0, Glu);
-    if ( !new_mem ) {
-       int    nzlmax  = Glu->nzlmax;
-       int    nzumax  = Glu->nzumax;
-       int    nzlumax = Glu->nzlumax;
-       fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
-       return (smemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
-    }
-    switch ( mem_type ) {
-      case LUSUP:
-       Glu->lusup   = (float *) new_mem;
-       Glu->nzlumax = *maxlen;
-       break;
-      case UCOL:
-       Glu->ucol   = (float *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-      case LSUB:
-       Glu->lsub   = (int *) new_mem;
-       Glu->nzlmax = *maxlen;
-       break;
-      case USUB:
-       Glu->usub   = (int *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-    }
-    return 0;
-copy_mem_float(int howmany, void *old, void *new)
-    register int i;
-    float *dold = old;
-    float *dnew = new;
-    for (i = 0; i < howmany; i++) dnew[i] = dold[i];
- * Expand the existing storage to accommodate more fill-ins.
- */
-*sexpand (
-        int *prev_len,   /* length used from previous call */
-        MemType type,    /* which part of the memory to expand */
-        int len_to_copy, /* size of the memory to be copied to new store */
-        int keep_prev,   /* = 1: use prev_len;
-                            = 0: compute new_len to expand */
-        GlobalLU_t *Glu  /* modified - global LU data structures */
-       )
-    float    EXPAND = 1.5;
-    float    alpha;
-    void     *new_mem, *old_mem;
-    int      new_len, tries, lword, extra, bytes_to_copy;
-    alpha = EXPAND;
-    if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
-        new_len = *prev_len;
-    else {
-       new_len = alpha * *prev_len;
-    }
-    if ( type == LSUB || type == USUB ) lword = sizeof(int);
-    else lword = sizeof(float);
-    if ( Glu->MemModel == SYSTEM ) {
-       new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-       if ( no_expand != 0 ) {
-           tries = 0;
-           if ( keep_prev ) {
-               if ( !new_mem ) return (NULL);
-           } else {
-               while ( !new_mem ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-               }
-           }
-           if ( type == LSUB || type == USUB ) {
-               copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
-           } else {
-               copy_mem_float(len_to_copy, expanders[type].mem, new_mem);
-           }
-           SUPERLU_FREE (expanders[type].mem);
-       }
-       expanders[type].mem = (void *) new_mem;
-    } else { /* MemModel == USER */
-       if ( no_expand == 0 ) {
-           new_mem = suser_malloc(new_len * lword, HEAD);
-           if ( NotDoubleAlign(new_mem) &&
-               (type == LUSUP || type == UCOL) ) {
-               old_mem = new_mem;
-               new_mem = (void *)DoubleAlign(new_mem);
-               extra = (char*)new_mem - (char*)old_mem;
-#ifdef DEBUG           
-               printf("expand(): not aligned, extra %d\n", extra);
-               stack.top1 += extra;
-               stack.used += extra;
-           }
-           expanders[type].mem = (void *) new_mem;
-       }
-       else {
-           tries = 0;
-           extra = (new_len - *prev_len) * lword;
-           if ( keep_prev ) {
-               if ( StackFull(extra) ) return (NULL);
-           } else {
-               while ( StackFull(extra) ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   extra = (new_len - *prev_len) * lword;          
-               }
-           }
-           if ( type != USUB ) {
-               new_mem = (void*)((char*)expanders[type + 1].mem + extra);
-               bytes_to_copy = (char*)stack.array + stack.top1
-                   - (char*)expanders[type + 1].mem;
-               user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
-               if ( type < USUB ) {
-                   Glu->usub = expanders[USUB].mem =
-                       (void*)((char*)expanders[USUB].mem + extra);
-               }
-               if ( type < LSUB ) {
-                   Glu->lsub = expanders[LSUB].mem =
-                       (void*)((char*)expanders[LSUB].mem + extra);
-               }
-               if ( type < UCOL ) {
-                   Glu->ucol = expanders[UCOL].mem =
-                       (void*)((char*)expanders[UCOL].mem + extra);
-               }
-               stack.top1 += extra;
-               stack.used += extra;
-               if ( type == UCOL ) {
-                   stack.top1 += extra;   /* Add same amount for USUB */
-                   stack.used += extra;
-               }
-           } /* if ... */
-       } /* else ... */
-    }
-    expanders[type].size = new_len;
-    *prev_len = new_len;
-    if ( no_expand ) ++no_expand;
-    return (void *) expanders[type].mem;
-} /* sexpand */
- * Compress the work[] array to remove fragmentation.
- */
-sStackCompress(GlobalLU_t *Glu)
-    register int iword, dword, ndim;
-    char    *last, *fragment;
-    int      *ifrom, *ito;
-    float   *dfrom, *dto;
-    int      *xlsub, *lsub, *xusub, *usub, *xlusup;
-    float   *ucol, *lusup;
-    iword = sizeof(int);
-    dword = sizeof(float);
-    ndim = Glu->n;
-    xlsub  = Glu->xlsub;
-    lsub   = Glu->lsub;
-    xusub  = Glu->xusub;
-    usub   = Glu->usub;
-    xlusup = Glu->xlusup;
-    ucol   = Glu->ucol;
-    lusup  = Glu->lusup;
-    dfrom = ucol;
-    dto = (float *)((char*)lusup + xlusup[ndim] * dword);
-    copy_mem_float(xusub[ndim], dfrom, dto);
-    ucol = dto;
-    ifrom = lsub;
-    ito = (int *) ((char*)ucol + xusub[ndim] * iword);
-    copy_mem_int(xlsub[ndim], ifrom, ito);
-    lsub = ito;
-    ifrom = usub;
-    ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
-    copy_mem_int(xusub[ndim], ifrom, ito);
-    usub = ito;
-    last = (char*)usub + xusub[ndim] * iword;
-    fragment = (char*) (((char*)stack.array + stack.top1) - last);
-    stack.used -= (long int) fragment;
-    stack.top1 -= (long int) fragment;
-    Glu->ucol = ucol;
-    Glu->lsub = lsub;
-    Glu->usub = usub;
-#ifdef DEBUG
-    printf("sStackCompress: fragment %d\n", fragment);
-    /* for (last = 0; last < ndim; ++last)
-       print_lu_col("After compress:", last, 0);*/
- * Allocate storage for original matrix A
- */
-sallocateA(int n, int nnz, float **a, int **asub, int **xa)
-    *a    = (float *) floatMalloc(nnz);
-    *asub = (int *) intMalloc(nnz);
-    *xa   = (int *) intMalloc(n+1);
-float *floatMalloc(int n)
-    float *buf;
-    buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float)); 
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
-    }
-    return (buf);
-float *floatCalloc(int n)
-    float *buf;
-    register int i;
-    float zero = 0.0;
-    buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
-    }
-    for (i = 0; i < n; ++i) buf[i] = zero;
-    return (buf);
-int smemory_usage(const int nzlmax, const int nzumax, 
-                 const int nzlumax, const int n)
-    register int iword, dword;
-    iword   = sizeof(int);
-    dword   = sizeof(float);
-    return (10 * n * iword +
-           nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
diff --git a/superlu/smyblas2.c b/superlu/smyblas2.c
deleted file mode 100644
index 8e9bb09a..00000000
--- a/superlu/smyblas2.c
+++ /dev/null
@@ -1,245 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:          smyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void slsolve ( int ldm, int ncol, float *M, float *rhs )
-    int k;
-    float x0, x1, x2, x3, x4, x5, x6, x7;
-    float *M0;
-    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-    M0 = &M[0];
-    while ( firstcol < ncol - 7 ) { /* Do 8 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-      Mki4 = Mki3 + ldm + 1;
-      Mki5 = Mki4 + ldm + 1;
-      Mki6 = Mki5 + ldm + 1;
-      Mki7 = Mki6 + ldm + 1;
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-      x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++;
-      x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++;
-      x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
-      x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
-                          - x6 * *Mki6++;
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      rhs[++firstcol] = x4;
-      rhs[++firstcol] = x5;
-      rhs[++firstcol] = x6;
-      rhs[++firstcol] = x7;
-      ++firstcol;
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-                       - x2 * *Mki2++ - x3 * *Mki3++
-                        - x4 * *Mki4++ - x5 * *Mki5++
-                       - x6 * *Mki6++ - x7 * *Mki7++;
-      M0 += 8 * ldm + 8;
-    }
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      ++firstcol;
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-                       - x2 * *Mki2++ - x3 * *Mki3++;
-      M0 += 4 * ldm + 4;
-    }
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      rhs[++firstcol] = x1;
-      ++firstcol;
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
-    }
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-susolve ( ldm, ncol, M, rhs )
-int ldm;       /* in */
-int ncol;      /* in */
-float *M;      /* in */
-float *rhs;    /* modified */
-    float xj;
-    int jcol, j, irow;
-    jcol = ncol - 1;
-    for (j = 0; j < ncol; j++) {
-       xj = rhs[jcol] / M[jcol + jcol*ldm];            /* M(jcol, jcol) */
-       rhs[jcol] = xj;
-       for (irow = 0; irow < jcol; irow++)
-           rhs[irow] -= xj * M[irow + jcol*ldm];       /* M(irow, jcol) */
-       jcol--;
-    }
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-int ldm;       /* in -- leading dimension of M */
-int nrow;      /* in */ 
-int ncol;      /* in */
-float *M;      /* in */
-float *vec;    /* in */
-float *Mxvec;  /* in/out */
-    float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
-    float *M0;
-    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-    int k;
-    M0 = &M[0];
-    while ( firstcol < ncol - 7 ) {    /* Do 8 columns */
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-        Mki2 = Mki1 + ldm;
-        Mki3 = Mki2 + ldm;
-       Mki4 = Mki3 + ldm;
-       Mki5 = Mki4 + ldm;
-       Mki6 = Mki5 + ldm;
-       Mki7 = Mki6 + ldm;
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       vi4 = vec[firstcol++];
-       vi5 = vec[firstcol++];
-       vi6 = vec[firstcol++];
-       vi7 = vec[firstcol++];  
-       for (k = 0; k < nrow; k++) 
-           Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-                     + vi2 * *Mki2++ + vi3 * *Mki3++ 
-                     + vi4 * *Mki4++ + vi5 * *Mki5++
-                     + vi6 * *Mki6++ + vi7 * *Mki7++;
-       M0 += 8 * ldm;
-    }
-    while ( firstcol < ncol - 3 ) {    /* Do 4 columns */
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-       Mki2 = Mki1 + ldm;
-       Mki3 = Mki2 + ldm;
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       for (k = 0; k < nrow; k++) 
-           Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-                     + vi2 * *Mki2++ + vi3 * *Mki3++ ;
-       M0 += 4 * ldm;
-    }
-    while ( firstcol < ncol ) {                /* Do 1 column */
-       Mki0 = M0;
-       vi0 = vec[firstcol++];
-       for (k = 0; k < nrow; k++)
-           Mxvec[k] += vi0 * *Mki0++;
-       M0 += ldm;
-    }
diff --git a/superlu/sp_coletree.c b/superlu/sp_coletree.c
deleted file mode 100644
index 48487085..00000000
--- a/superlu/sp_coletree.c
+++ /dev/null
@@ -1,354 +0,0 @@
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-/*  Elimination tree computation and layout routines */
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_ddefs.h"
- *  Implementation of disjoint set union routines.
- *  Elements are integers in 0..n-1, and the 
- *  names of the sets themselves are of type int.
- *  
- *  Calls are:
- *  initialize_disjoint_sets (n) initial call.
- *  s = make_set (i)             returns a set containing only i.
- *  s = link (t, u)             returns s = t union u, destroying t and u.
- *  s = find (i)                return name of set containing i.
- *  finalize_disjoint_sets      final call.
- *
- *  This implementation uses path compression but not weighted union.
- *  See Tarjan's book for details.
- *  John Gilbert, CMI, 1987.
- *
- *  Implemented path-halving by XSL 07/05/95.
- */
-static int     *pp;            /* parent array for sets */
-int *mxCallocInt(int n)
-    register int i;
-    int *buf;
-    buf = (int *) SUPERLU_MALLOC( n * sizeof(int) );
-    if ( !buf ) {
-         ABORT("SUPERLU_MALLOC fails for buf in mxCallocInt()");
-       }
-    for (i = 0; i < n; i++) buf[i] = 0;
-    return (buf);
-void initialize_disjoint_sets (
-       int n
-       )
-       pp = mxCallocInt(n);
-int make_set (
-       int i
-       )
-       pp[i] = i;
-       return i;
-int link (
-       int s,
-       int t
-       )
-       pp[s] = t;
-       return t;
-int find (int i)
-    register int p, gp;
-    p = pp[i];
-    gp = pp[p];
-    while (gp != p) {
-       pp[i] = gp;
-       i = gp;
-       p = pp[i];
-       gp = pp[p];
-    }
-    return (p);
-#if 0
-int find (
-       int i
-       )
-       if (pp[i] != i) 
-               pp[i] = find (pp[i]);
-       return pp[i];
-void finalize_disjoint_sets (
-       void
-       )
-       SUPERLU_FREE(pp);
- *      Find the elimination tree for A'*A.
- *      This uses something similar to Liu's algorithm. 
- *      It runs in time O(nz(A)*log n) and does not form A'*A.
- *
- *      Input:
- *        Sparse matrix A.  Numeric values are ignored, so any
- *        explicit zeros are treated as nonzero.
- *      Output:
- *        Integer array of parents representing the elimination
- *        tree of the symbolic product A'*A.  Each vertex is a
- *        column of A, and nc means a root of the elimination forest.
- *
- *      John R. Gilbert, Xerox, 10 Dec 1990
- *      Based on code by JRG dated 1987, 1988, and 1990.
- */
- * Nonsymmetric elimination tree
- */
-           int *acolst, int *acolend, /* column start and end past 1 */
-           int *arow,                 /* row indices of A */
-           int nr, int nc,            /* dimension of A */
-           int *parent                /* parent in elim tree */
-           )
-       int     *root;                  /* root of subtee of etree      */
-       int     *firstcol;              /* first nonzero col in each row*/
-       int     rset, cset;             
-       int     row, col;
-       int     rroot;
-       int     p;
-       root = mxCallocInt (nc);
-       initialize_disjoint_sets (nc);
-       /* Compute firstcol[row] = first nonzero column in row */
-       firstcol = mxCallocInt (nr);
-       for (row = 0; row < nr; firstcol[row++] = nc);
-       for (col = 0; col < nc; col++) 
-               for (p = acolst[col]; p < acolend[col]; p++) {
-                       row = arow[p];
-                       firstcol[row] = SUPERLU_MIN(firstcol[row], col);
-               }
-       /* Compute etree by Liu's algorithm for symmetric matrices,
-           except use (firstcol[r],c) in place of an edge (r,c) of A.
-          Thus each row clique in A'*A is replaced by a star
-          centered at its first vertex, which has the same fill. */
-       for (col = 0; col < nc; col++) {
-               cset = make_set (col);
-               root[cset] = col;
-               parent[col] = nc; /* Matlab */
-               for (p = acolst[col]; p < acolend[col]; p++) {
-                       row = firstcol[arow[p]];
-                       if (row >= col) continue;
-                       rset = find (row);
-                       rroot = root[rset];
-                       if (rroot != col) {
-                               parent[rroot] = col;
-                               cset = link (cset, rset);
-                               root[cset] = col;
-                       }
-               }
-       }
-       SUPERLU_FREE (root);
-       SUPERLU_FREE (firstcol);
-       finalize_disjoint_sets ();
-       return 0;
- *  q = TreePostorder (n, p);
- *
- *     Postorder a tree.
- *     Input:
- *       p is a vector of parent pointers for a forest whose
- *        vertices are the integers 0 to n-1; p[root]==n.
- *     Output:
- *       q is a vector indexed by 0..n-1 such that q[i] is the
- *       i-th vertex in a postorder numbering of the tree.
- *
- *        ( 2/7/95 modified by X.Li:
- *          q is a vector indexed by 0:n-1 such that vertex i is the
- *          q[i]-th vertex in a postorder numbering of the tree.
- *          That is, this is the inverse of the previous q. )
- *
- *     In the child structure, lower-numbered children are represented
- *     first, so that a tree which is already numbered in postorder
- *     will not have its order changed.
- *    
- *  Written by John Gilbert, Xerox, 10 Dec 1990.
- *  Based on code written by John Gilbert at CMI in 1987.
- */
-static int     *first_kid, *next_kid;  /* Linked list of children.     */
-static int     *post, postnum;
- * Depth-first search from vertex v.
- */
-void etdfs (
-       int     v
-       )
-       int     w;
-       for (w = first_kid[v]; w != -1; w = next_kid[w]) {
-               etdfs (w);
-       }
-       /* post[postnum++] = v; in Matlab */
-       post[v] = postnum++;    /* Modified by X.Li on 2/14/95 */
- * Post order a tree
- */
-int *TreePostorder(
-       int n,
-       int *parent
-       int     v, dad;
-       /* Allocate storage for working arrays and results      */
-       first_kid =     mxCallocInt (n+1);
-       next_kid  =     mxCallocInt (n+1);
-       post      =     mxCallocInt (n+1);
-       /* Set up structure describing children */
-       for (v = 0; v <= n; first_kid[v++] = -1);
-       for (v = n-1; v >= 0; v--) {
-               dad = parent[v];
-               next_kid[v] = first_kid[dad];
-               first_kid[dad] = v;
-       }
-       /* Depth-first search from dummy root vertex #n */
-       postnum = 0;
-       etdfs (n);
-       SUPERLU_FREE (first_kid);
-       SUPERLU_FREE (next_kid);
-       return post;
- *      p = spsymetree (A);
- *
- *      Find the elimination tree for symmetric matrix A.
- *      This uses Liu's algorithm, and runs in time O(nz*log n).
- *
- *      Input:
- *        Square sparse matrix A.  No check is made for symmetry;
- *        elements below and on the diagonal are ignored.
- *        Numeric values are ignored, so any explicit zeros are 
- *        treated as nonzero.
- *      Output:
- *        Integer array of parents representing the etree, with n
- *        meaning a root of the elimination forest.
- *      Note:  
- *        This routine uses only the upper triangle, while sparse
- *        Cholesky (as in spchol.c) uses only the lower.  Matlab's
- *        dense Cholesky uses only the upper.  This routine could
- *        be modified to use the lower triangle either by transposing
- *        the matrix or by traversing it by rows with auxiliary
- *        pointer and link arrays.
- *
- *      John R. Gilbert, Xerox, 10 Dec 1990
- *      Based on code by JRG dated 1987, 1988, and 1990.
- *      Modified by X.S. Li, November 1999.
- */
- * Symmetric elimination tree
- */
-           int *acolst, int *acolend, /* column starts and ends past 1 */
-           int *arow,            /* row indices of A */
-           int n,                /* dimension of A */
-           int *parent     /* parent in elim tree */
-           )
-       int     *root;              /* root of subtree of etree         */
-       int     rset, cset;             
-       int     row, col;
-       int     rroot;
-       int     p;
-       root = mxCallocInt (n);
-       initialize_disjoint_sets (n);
-       for (col = 0; col < n; col++) {
-               cset = make_set (col);
-               root[cset] = col;
-               parent[col] = n; /* Matlab */
-               for (p = acolst[col]; p < acolend[col]; p++) {
-                       row = arow[p];
-                       if (row >= col) continue;
-                       rset = find (row);
-                       rroot = root[rset];
-                       if (rroot != col) {
-                               parent[rroot] = col;
-                               cset = link (cset, rset);
-                               root[cset] = col;
-                       }
-               }
-       }
-       SUPERLU_FREE (root);
-       finalize_disjoint_sets ();
-       return 0;
-} /* SP_SYMETREE */
diff --git a/superlu/sp_ienv.c b/superlu/sp_ienv.c
deleted file mode 100644
index 66a854df..00000000
--- a/superlu/sp_ienv.c
+++ /dev/null
@@ -1,86 +0,0 @@
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:          sp_ienv.c
- * History:             Modified from lapack routine ILAENV
- */
-#include "slu_Cnames.h"
-extern void xerbla_();
-sp_ienv(int ispec)
-    Purpose   
-    =======   
-    sp_ienv() is inquired to choose machine-dependent parameters for the
-    local environment. See ISPEC for a description of the parameters.   
-    This version provides a set of parameters which should give good,   
-    but not optimal, performance on many of the currently available   
-    computers.  Users are encouraged to modify this subroutine to set   
-    the tuning parameters for their particular machine using the option   
-    and problem size information in the arguments.   
-    Arguments   
-    =========   
-    ISPEC   (input) int
-            Specifies the parameter to be returned as the value of SP_IENV.   
-            = 1: the panel size w; a panel consists of w consecutive
-                columns of matrix A in the process of Gaussian elimination.
-                The best value depends on machine's cache characters.
-            = 2: the relaxation parameter relax; if the number of
-                nodes (columns) in a subtree of the elimination tree is less
-                than relax, this subtree is considered as one supernode,
-                regardless of their row structures.
-            = 3: the maximum size for a supernode;
-           = 4: the minimum row dimension for 2-D blocking to be used;
-           = 5: the minimum column dimension for 2-D blocking to be used;
-           = 6: the estimated fills factor for L and U, compared with A;
-   (SP_IENV) (output) int
-            >= 0: the value of the parameter specified by ISPEC   
-            < 0:  if SP_IENV = -k, the k-th argument had an illegal value. 
-    ===================================================================== 
-    int i;
-    switch (ispec) {
-       case 1: return (10);
-       case 2: return (5);
-       case 3: return (100);
-       case 4: return (200);
-       case 5: return (40);
-        case 6: return (20);
-    }
-    /* Invalid value for ISPEC */
-    i = 1;
-    xerbla_("sp_ienv", &i);
-    return 0;
-} /* sp_ienv_ */
diff --git a/superlu/sp_preorder.c b/superlu/sp_preorder.c
deleted file mode 100644
index cd1a5264..00000000
--- a/superlu/sp_preorder.c
+++ /dev/null
@@ -1,224 +0,0 @@
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_ddefs.h"
-sp_preorder(superlu_options_t *options,  SuperMatrix *A, int *perm_c, 
-           int *etree, SuperMatrix *AC)
- * Purpose
- * =======
- *
- * sp_preorder() permutes the columns of the original matrix. It performs
- * the following steps:
- *
- *    1. Apply column permutation perm_c[] to A's column pointers to form AC;
- *
- *    2. If options->Fact = DOFACT, then
- *       (1) Compute column elimination tree etree[] of AC'AC;
- *       (2) Post order etree[] to get a postordered elimination tree etree[],
- *           and a postorder permutation post[];
- *       (3) Apply post[] permutation to columns of AC;
- *       (4) Overwrite perm_c[] with the product perm_c * post.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         Specifies whether or not the elimination tree will be re-used.
- *         If options->Fact == DOFACT, this means first time factor A, 
- *         etree is computed, postered, and output.
- *         Otherwise, re-factor A, etree is input, unchanged on exit.
- *
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = NC or SLU_NCP; Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- * perm_c  (input/output) int*
- *        Column permutation vector of size A->ncol, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *         If options->Fact == DOFACT, perm_c is both input and output.
- *         On output, it is changed according to a postorder of etree.
- *         Otherwise, perm_c is input.
- *
- * etree   (input/output) int*
- *         Elimination tree of Pc'*A'*A*Pc, dimension A->ncol.
- *         If options->Fact == DOFACT, etree is an output argument,
- *         otherwise it is an input argument.
- *         Note: etree is a vector of parent pointers for a forest whose
- *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *
- * AC      (output) SuperMatrix*
- *         The resulting matrix after applied the column permutation
- *         perm_c[] to matrix A. The type of AC can be:
- *         Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
- *
- */
-    NCformat  *Astore;
-    NCPformat *ACstore;
-    int       *iwork, *post;
-    register  int n, i;
-    n = A->ncol;
-    /* Apply column permutation perm_c to A's column pointers so to
-       obtain NCP format in AC = A*Pc.  */
-    AC->Stype       = SLU_NCP;
-    AC->Dtype       = A->Dtype;
-    AC->Mtype       = A->Mtype;
-    AC->nrow        = A->nrow;
-    AC->ncol        = A->ncol;
-    Astore          = A->Store;
-    ACstore = AC->Store = (void *) SUPERLU_MALLOC( sizeof(NCPformat) );
-    if ( !ACstore ) ABORT("SUPERLU_MALLOC fails for ACstore");
-    ACstore->nnz    = Astore->nnz;
-    ACstore->nzval  = Astore->nzval;
-    ACstore->rowind = Astore->rowind;
-    ACstore->colbeg = (int*) SUPERLU_MALLOC(n*sizeof(int));
-    if ( !(ACstore->colbeg) ) ABORT("SUPERLU_MALLOC fails for 
-    ACstore->colend = (int*) SUPERLU_MALLOC(n*sizeof(int));
-    if ( !(ACstore->colend) ) ABORT("SUPERLU_MALLOC fails for 
-#ifdef DEBUG
-    print_int_vec("pre_order:", n, perm_c);
-    check_perm("Initial perm_c", n, perm_c);
-    for (i = 0; i < n; i++) {
-       ACstore->colbeg[perm_c[i]] = Astore->colptr[i]; 
-       ACstore->colend[perm_c[i]] = Astore->colptr[i+1];
-    }
-    if ( options->Fact == DOFACT ) {
-#undef ETREE_ATplusA
-#ifdef ETREE_ATplusA
-        /*--------------------------------------------
-         COMPUTE THE ETREE OF Pc*(A'+A)*Pc'.
-         --------------------------------------------*/
-        int *b_colptr, *b_rowind, bnz, j;
-       int *c_colbeg, *c_colend;
-        /*printf("Use etree(A'+A)\n");*/
-       /* Form B = A + A'. */
-       at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
-                 &bnz, &b_colptr, &b_rowind);
-       /* Form C = Pc*B*Pc'. */
-       c_colbeg = (int*) SUPERLU_MALLOC(2*n*sizeof(int));
-       c_colend = c_colbeg + n;
-       if (!c_colbeg ) ABORT("SUPERLU_MALLOC fails for c_colbeg/c_colend");
-       for (i = 0; i < n; i++) {
-           c_colbeg[perm_c[i]] = b_colptr[i]; 
-           c_colend[perm_c[i]] = b_colptr[i+1];
-       }
-       for (j = 0; j < n; ++j) {
-           for (i = c_colbeg[j]; i < c_colend[j]; ++i) {
-               b_rowind[i] = perm_c[b_rowind[i]];
-           }
-       }
-       /* Compute etree of C. */
-       sp_symetree(c_colbeg, c_colend, b_rowind, n, etree);
-       SUPERLU_FREE(b_colptr);
-       if ( bnz ) SUPERLU_FREE(b_rowind);
-       SUPERLU_FREE(c_colbeg);
-        /*--------------------------------------------
-         --------------------------------------------*/
-       sp_coletree(ACstore->colbeg, ACstore->colend, ACstore->rowind,
-                   A->nrow, A->ncol, etree);
-#ifdef DEBUG   
-       print_int_vec("etree:", n, etree);
-       /* In symmetric mode, do not do postorder here. */
-       if ( options->SymmetricMode == NO ) {
-           /* Post order etree */
-           post = (int *) TreePostorder(n, etree);
-           /* for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
-              iwork = post; */
-#ifdef DEBUG
-           print_int_vec("post:", n+1, post);
-           check_perm("post", n, post);        
-           iwork = (int*) SUPERLU_MALLOC((n+1)*sizeof(int)); 
-           if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
-           /* Renumber etree in postorder */
-           for (i = 0; i < n; ++i) iwork[post[i]] = post[etree[i]];
-           for (i = 0; i < n; ++i) etree[i] = iwork[i];
-#ifdef DEBUG   
-           print_int_vec("postorder etree:", n, etree);
-           /* Postmultiply A*Pc by post[] */
-           for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colbeg[i];
-           for (i = 0; i < n; ++i) ACstore->colbeg[i] = iwork[i];
-           for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colend[i];
-           for (i = 0; i < n; ++i) ACstore->colend[i] = iwork[i];
-           for (i = 0; i < n; ++i)
-               iwork[i] = post[perm_c[i]];  /* product of perm_c and post */
-           for (i = 0; i < n; ++i) perm_c[i] = iwork[i];
-#ifdef DEBUG
-           print_int_vec("Pc*post:", n, perm_c);
-           check_perm("final perm_c", n, perm_c);      
-           SUPERLU_FREE (post);
-           SUPERLU_FREE (iwork);
-       } /* end postordering */
-    } /* if options->Fact == DOFACT ... */
-int check_perm(char *what, int n, int *perm)
-    register int i;
-    int          *marker;
-    marker = (int *) calloc(n, sizeof(int));
-    for (i = 0; i < n; ++i) {
-       if ( marker[perm[i]] == 1 || perm[i] >= n ) {
-           printf("%s: Not a valid PERM[%d] = %d\n", what, i, perm[i]);
-           ABORT("check_perm");
-       } else {
-           marker[perm[i]] = 1;
-       }
-    }
-    SUPERLU_FREE(marker);
-    return 0;
diff --git a/superlu/spanel_bmod.c b/superlu/spanel_bmod.c
deleted file mode 100644
index 91bbb738..00000000
--- a/superlu/spanel_bmod.c
+++ /dev/null
@@ -1,462 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_sdefs.h"
-extern void strsv_();
-extern void sgemv_();
- * Function prototypes 
- */
-void slsolve(int, int, float *, float *);
-void smatvec(int, int, int, float *, float *, float *);
-extern void scheck_tempv();
-spanel_bmod (
-           const int  m,          /* in - number of rows in the matrix */
-           const int  w,          /* in */
-           const int  jcol,       /* in */
-           const int  nseg,       /* in */
-           float     *dense,     /* out, of size n by w */
-           float     *tempv,     /* working array */
-           int        *segrep,    /* in */
-           int        *repfnz,    /* in, of size n by w */
-           GlobalLU_t *Glu,       /* modified */
-           SuperLUStat_t *stat    /* output */
-           )
- * Purpose
- * =======
- *
- *    Performs numeric block updates (sup-panel) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- *    Before entering this routine, the original nonzeros in the panel 
- *    were already copied into the spa[m,w].
- *
- *    Updated/Output parameters-
- *     dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
- *      collectively in the m-by-w vector dense[*]. 
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-    int          incx = 1, incy = 1;
-    float       alpha, beta;
-    register int k, ksub;
-    int          fsupc, nsupc, nsupr, nrow;
-    int          krep, krep_ind;
-    float       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          segsze;
-    int          block_nrow;  /* no of rows in a block row */
-    register int lptr;       /* Points to the row subscripts of a supernode */
-    int          kfnz, irow, no_zeros; 
-    register int isub, isub1, i;
-    register int jj;         /* Index through each column in the panel */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    float       *lusup;
-    int          *xlusup;
-    int          *repfnz_col; /* repfnz[] for a column in the panel */
-    float       *dense_col;  /* dense[] for a column in the panel */
-    float       *tempv1;             /* Used in 1-D update */
-    float       *TriTmp, *MatvecTmp; /* used in 2-D update */
-    float      zero = 0.0;
-    float      one = 1.0;
-    register int ldaTmp;
-    register int r_ind, r_hi;
-    static   int first = 1, maxsuper, rowblk, colblk;
-    flops_t  *ops = stat->ops;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       rowblk   = sp_ienv(4);
-       colblk   = sp_ienv(5);
-       first = 0;
-    }
-    ldaTmp = maxsuper + rowblk;
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
-       /* krep = representative of current k-th supernode
-        * fsupc = first supernodal column
-        * nsupc = no of columns in a supernode
-        * nsupr = no of rows in a supernode
-        */
-        krep = segrep[k--];
-       fsupc = xsup[supno[krep]];
-       nsupc = krep - fsupc + 1;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nrow = nsupr - nsupc;
-       lptr = xlsub[fsupc];
-       krep_ind = lptr + nsupc - 1;
-       repfnz_col = repfnz;
-       dense_col = dense;
-       if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
-           TriTmp = tempv;
-           /* Sequence through each column in panel -- triangular solves */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-               ops[TRSV] += segsze * (segsze - 1);
-               ops[GEMV] += 2 * nrow * segsze;
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       dense_col[irow] -= ukj * lusup[luptr];
-                       ++luptr;
-                   }
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   luptr1 = luptr - nsupr;
-                   if ( segsze == 2 ) {
-                       ukj -= ukj1 * lusup[luptr1];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++;
-                           dense_col[irow] -= (ukj*lusup[luptr]
-                                               + ukj1*lusup[luptr1]);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       ukj1 -= ukj2 * lusup[luptr2-1];
-                       ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++; luptr2++;
-                           dense_col[irow] -= ( ukj*lusup[luptr]
-                             + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                       }
-                   }
-               } else  {       /* segsze >= 4 */
-                   /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
-                      holds the result of triangular solves.    */
-                   no_zeros = kfnz - fsupc;
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       TriTmp[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-#ifdef _CRAY
-                   STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-                   strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-                   slsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
-               } /* else ... */
-           }  /* for jj ... end tri-solves */
-           /* Block row updates; push all the way into dense[*] block */
-           for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
-               r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
-               block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
-               luptr = xlusup[fsupc] + nsupc + r_ind;
-               isub1 = lptr + nsupc + r_ind;
-               repfnz_col = repfnz;
-               TriTmp = tempv;
-               dense_col = dense;
-               /* Sequence through each column in panel -- matrix-vector */
-               for (jj = jcol; jj < jcol + w; jj++,
-                    repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-                   kfnz = repfnz_col[krep];
-                   if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-                   segsze = krep - kfnz + 1;
-                   if ( segsze <= 3 ) continue;   /* skip unrolled cases */
-                   /* Perform a block update, and scatter the result of
-                      matrix-vector to dense[].                 */
-                   no_zeros = kfnz - fsupc;
-                   luptr1 = luptr + nsupr * no_zeros;
-                   MatvecTmp = &TriTmp[maxsuper];
-                   alpha = one; 
-                    beta = zero;
-#ifdef _CRAY
-                   SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-                   sgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-                   smatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
-                          TriTmp, MatvecTmp);
-                   /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
-                    * such that MatvecTmp[*] can be re-used for the
-                    * the next blok row update. dense[] will be copied into 
-                    * global store after the whole panel has been finished.
-                    */
-                   isub = isub1;
-                   for (i = 0; i < block_nrow; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] -= MatvecTmp[i];
-                       MatvecTmp[i] = zero;
-                       ++isub;
-                   }
-               } /* for jj ... */
-           } /* for each block row ... */
-           /* Scatter the triangular solves into SPA dense[*] */
-           repfnz_col = repfnz;
-           TriTmp = tempv;
-           dense_col = dense;
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               if ( segsze <= 3 ) continue; /* skip unrolled cases */
-               no_zeros = kfnz - fsupc;                
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   dense_col[irow] = TriTmp[i];
-                   TriTmp[i] = zero;
-                   ++isub;
-               }
-           } /* for jj ... */
-       } else { /* 1-D block modification */
-           /* Sequence through each column in the panel */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-               ops[TRSV] += segsze * (segsze - 1);
-               ops[GEMV] += 2 * nrow * segsze;
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       dense_col[irow] -= ukj * lusup[luptr];
-                       ++luptr;
-                   }
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr1 = luptr - nsupr;
-                   if ( segsze == 2 ) {
-                       ukj -= ukj1 * lusup[luptr1];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr;  ++luptr1;
-                           dense_col[irow] -= (ukj*lusup[luptr]
-                                               + ukj1*lusup[luptr1]);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       ukj1 -= ukj2 * lusup[luptr2-1];
-                       ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr; ++luptr1; ++luptr2;
-                           dense_col[irow] -= ( ukj*lusup[luptr]
-                             + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                       }
-                   }
-               } else  { /* segsze >= 4 */
-                   /* 
-                    * Perform a triangular solve and block update,
-                    * then scatter the result of sup-col update to dense[].
-                    */
-                   no_zeros = kfnz - fsupc;
-                   /* Copy U[*,j] segment from dense[*] to tempv[*]: 
-                    *    The result of triangular solve is in tempv[*];
-                    *    The result of matrix vector update is in dense_col[*]
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       tempv[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-#ifdef _CRAY
-                   STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-                   strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-                   luptr += segsze;    /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                    alpha = one;
-                    beta = zero;
-#ifdef _CRAY
-                   SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-                   sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-                   slsolve ( nsupr, segsze, &lusup[luptr], tempv );
-                   luptr += segsze;        /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                   smatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
-                   /* Scatter tempv[*] into SPA dense[*] temporarily, such
-                    * that tempv[*] can be used for the triangular solve of
-                    * the next column of the panel. They will be copied into 
-                    * ucol[*] after the whole panel has been finished.
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] = tempv[i];
-                       tempv[i] = zero;
-                       isub++;
-                   }
-                   /* Scatter the update from tempv1[*] into SPA dense[*] */
-                   /* Start dense rectangular L */
-                   for (i = 0; i < nrow; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] -= tempv1[i];
-                       tempv1[i] = zero;
-                       ++isub; 
-                   }
-               } /* else segsze>=4 ... */
-           } /* for each column in the panel... */
-       } /* else 1-D update ... */
-    } /* for each updating supernode ... */
diff --git a/superlu/spanel_dfs.c b/superlu/spanel_dfs.c
deleted file mode 100644
index d32af402..00000000
--- a/superlu/spanel_dfs.c
+++ /dev/null
@@ -1,256 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_sdefs.h"
-spanel_dfs (
-          const int  m,           /* in - number of rows in the matrix */
-          const int  w,           /* in */
-          const int  jcol,        /* in */
-          SuperMatrix *A,       /* in - original matrix */
-          int        *perm_r,     /* in */
-          int        *nseg,       /* out */
-          float     *dense,      /* out */
-          int        *panel_lsub, /* out */
-          int        *segrep,     /* out */
-          int        *repfnz,     /* out */
-          int        *xprune,     /* out */
-          int        *marker,     /* out */     
-          int        *parent,     /* working array */
-          int        *xplore,     /* working array */
-          GlobalLU_t *Glu         /* modified */
-          )
- * Purpose
- * =======
- *
- *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives.
- *
- *   The routine returns one list of the supernodal representatives
- *   in topological order of the dfs that generates them. This list is
- *   a superset of the topological order of each individual column within
- *   the panel. 
- *   The location of the first nonzero in each supernodal segment
- *   (supernodal entry location) is also returned. Each column has a 
- *   separate list for this purpose.
- *
- *   Two marker arrays are used for dfs:
- *     marker[i] == jj, if i was visited during dfs of current column jj;
- *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
- *
- *   marker: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- */
-    NCPformat *Astore;
-    float    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int              krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
-    int       k, krow, kmark, kperm;
-    int       xdfs, maxdfs, kpar;
-    int       jj;         /* index through each column in the panel */
-    int       *marker1;           /* marker1[jj] >= jcol if vertex jj was 
-                             by a previous column within this panel.   */
-    int       *repfnz_col; /* start of each column in the panel */
-    float    *dense_col;  /* start of each column in the panel */
-    int       nextl_col;   /* next available position in panel_lsub[*,jj] */
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    /* Initialize pointers */
-    Astore     = A->Store;
-    a          = Astore->nzval;
-    asub       = Astore->rowind;
-    xa_begin   = Astore->colbeg;
-    xa_end     = Astore->colend;
-    marker1    = marker + m;
-    repfnz_col = repfnz;
-    dense_col  = dense;
-    *nseg      = 0;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    /* For each column in the panel */
-    for (jj = jcol; jj < jcol + w; jj++) {
-       nextl_col = (jj - jcol) * m;
-#ifdef CHK_DFS
-       printf("\npanel col %d: ", jj);
-       /* For each nonz in A[*,jj] do dfs */
-       for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
-           krow = asub[k];
-            dense_col[krow] = a[k];
-           kmark = marker[krow];       
-           if ( kmark == jj ) 
-               continue;     /* krow visited before, go to the next nonzero */
-           /* For each unmarked nbr krow of jj
-            * krow is in L: place it in structure of L[*,jj]
-            */
-           marker[krow] = jj;
-           kperm = perm_r[krow];
-           if ( kperm == EMPTY ) {
-               panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
-           }
-           /* 
-            * krow is in U: if its supernode-rep krep
-            * has been explored, update repfnz[*]
-            */
-           else {
-               krep = xsup[supno[kperm]+1] - 1;
-               myfnz = repfnz_col[krep];
-#ifdef CHK_DFS
-               printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, 
-               if ( myfnz != EMPTY ) { /* Representative visited before */
-                   if ( myfnz > kperm ) repfnz_col[krep] = kperm;
-                   /* continue; */
-               }
-               else {
-                   /* Otherwise, perform dfs starting at krep */
-                   oldrep = EMPTY;
-                   parent[krep] = oldrep;
-                   repfnz_col[krep] = kperm;
-                   xdfs = xlsub[krep];
-                   maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                   printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
-                   for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                   printf("\n");
-                   do {
-                       /* 
-                        * For each unmarked kchild of krep 
-                        */
-                       while ( xdfs < maxdfs ) {
-                           kchild = lsub[xdfs];
-                           xdfs++;
-                           chmark = marker[kchild];
-                           if ( chmark != jj ) { /* Not reached yet */
-                               marker[kchild] = jj;
-                               chperm = perm_r[kchild];
-                               /* Case kchild is in L: place it in L[*,j] */
-                               if ( chperm == EMPTY ) {
-                                   panel_lsub[nextl_col++] = kchild;
-                               } 
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               else {
-                                   chrep = xsup[supno[chperm]+1] - 1;
-                                   myfnz = repfnz_col[chrep];
-#ifdef CHK_DFS
-                                   printf("chrep %d,myfnz %d,perm_r[%d] 
-                                   if ( myfnz != EMPTY ) { /* Visited before */
-                                       if ( myfnz > chperm )
-                                           repfnz_col[chrep] = chperm;
-                                   }
-                                   else {
-                                       /* Cont. dfs at snode-rep of kchild */
-                                       xplore[krep] = xdfs;    
-                                       oldrep = krep;
-                                       krep = chrep; /* Go deeper down G(L) */
-                                       parent[krep] = oldrep;
-                                       repfnz_col[krep] = chperm;
-                                       xdfs = xlsub[krep];     
-                                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                                       printf("  xdfs %d, maxdfs %d: ", xdfs, 
-                                       for (i = xdfs; i < maxdfs; i++) 
printf(" %d", lsub[i]); 
-                                       printf("\n");
-                                   } /* else */
-                               } /* else */
-                           } /* if... */
-                       } /* while xdfs < maxdfs */
-                       /* krow has no more unexplored nbrs:
-                        *    Place snode-rep krep in postorder DFS, if this 
-                        *    segment is seen for the first time. (Note that
-                        *    "repfnz[krep]" may change later.)
-                        *    Backtrack dfs to its parent.
-                        */
-                       if ( marker1[krep] < jcol ) {
-                           segrep[*nseg] = krep;
-                           ++(*nseg);
-                           marker1[krep] = jj;
-                       }
-                       kpar = parent[krep]; /* Pop stack, mimic recursion */
-                       if ( kpar == EMPTY ) break; /* dfs done */
-                       krep = kpar;
-                       xdfs = xplore[krep];
-                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                       printf("  pop stack: krep %d,xdfs %d,maxdfs %d: ", 
-                       for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                       printf("\n");
-                   } while ( kpar != EMPTY ); /* do-while - until empty stack 
-               } /* else */
-           } /* else */
-       } /* for each nonz in A[*,jj] */
-       repfnz_col += m;    /* Move to next column */
-        dense_col += m;
-    } /* for jj ... */
diff --git a/superlu/spivotL.c b/superlu/spivotL.c
deleted file mode 100644
index ee66dbce..00000000
--- a/superlu/spivotL.c
+++ /dev/null
@@ -1,182 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <math.h>
-#include <stdlib.h>
-#include "slu_sdefs.h"
-#undef DEBUG
-        const int  jcol,     /* in */
-        const float u,      /* in - diagonal pivoting threshold */
-        int        *usepr,   /* re-use the pivot sequence given by 
perm_r/iperm_r */
-        int        *perm_r,  /* may be modified */
-        int        *iperm_r, /* in - inverse of perm_r */
-        int        *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
-        int        *pivrow,  /* out */
-        GlobalLU_t *Glu,     /* modified - global LU data structures */
-       SuperLUStat_t *stat  /* output */
-       )
- * Purpose
- * =======
- *   Performs the numerical pivoting on the current column of L,
- *   and the CDIV operation.
- *
- *   Pivot policy:
- *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
- *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
- *           pivot row = k;
- *       ELSE IF abs(A_jj) >= thresh THEN
- *           pivot row = j;
- *       ELSE
- *           pivot row = m;
- * 
- *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
- *
- *   Return value: 0      success;
- *                 i > 0  U(i,i) is exactly zero.
- *
- */
-    int          fsupc;            /* first column in the supernode */
-    int          nsupc;            /* no of columns in the supernode */
-    int          nsupr;     /* no of rows in the supernode */
-    int          lptr;     /* points to the starting subscript of the 
supernode */
-    int          pivptr, old_pivptr, diag, diagind;
-    float       pivmax, rtemp, thresh;
-    float       temp;
-    float       *lu_sup_ptr; 
-    float       *lu_col_ptr;
-    int          *lsub_ptr;
-    int          isub, icol, k, itemp;
-    int          *lsub, *xlsub;
-    float       *lusup;
-    int          *xlusup;
-    flops_t      *ops = stat->ops;
-    /* Initialize pointers */
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
-    nsupc      = jcol - fsupc;         /* excluding jcol; nsupc >= 0 */
-    lptr       = xlsub[fsupc];
-    nsupr      = xlsub[fsupc+1] - lptr;
-    lu_sup_ptr = &lusup[xlusup[fsupc]];        /* start of the current 
supernode */
-    lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
-    lsub_ptr   = &lsub[lptr];  /* start of row indices of the supernode */
-#ifdef DEBUG
-if ( jcol == MIN_COL ) {
-    printf("Before cdiv: col %d\n", jcol);
-    for (k = nsupc; k < nsupr; k++) 
-       printf("  lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
-    /* Determine the largest abs numerical value for partial pivoting;
-       Also search for user-specified pivot, and diagonal element. */
-    if ( *usepr ) *pivrow = iperm_r[jcol];
-    diagind = iperm_c[jcol];
-    pivmax = 0.0;
-    pivptr = nsupc;
-    diag = EMPTY;
-    old_pivptr = nsupc;
-    for (isub = nsupc; isub < nsupr; ++isub) {
-       rtemp = fabs (lu_col_ptr[isub]);
-       if ( rtemp > pivmax ) {
-           pivmax = rtemp;
-           pivptr = isub;
-       }
-       if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
-       if ( lsub_ptr[isub] == diagind ) diag = isub;
-    }
-    /* Test for singularity */
-    if ( pivmax == 0.0 ) {
-       *pivrow = lsub_ptr[pivptr];
-       perm_r[*pivrow] = jcol;
-       *usepr = 0;
-       return (jcol+1);
-    }
-    thresh = u * pivmax;
-    /* Choose appropriate pivotal element by our policy. */
-    if ( *usepr ) {
-        rtemp = fabs (lu_col_ptr[old_pivptr]);
-       if ( rtemp != 0.0 && rtemp >= thresh )
-           pivptr = old_pivptr;
-       else
-           *usepr = 0;
-    }
-    if ( *usepr == 0 ) {
-       /* Use diagonal pivot? */
-       if ( diag >= 0 ) { /* diagonal exists */
-           rtemp = fabs (lu_col_ptr[diag]);
-           if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
-        }
-       *pivrow = lsub_ptr[pivptr];
-    }
-    /* Record pivot row */
-    perm_r[*pivrow] = jcol;
-    /* Interchange row subscripts */
-    if ( pivptr != nsupc ) {
-       itemp = lsub_ptr[pivptr];
-       lsub_ptr[pivptr] = lsub_ptr[nsupc];
-       lsub_ptr[nsupc] = itemp;
-       /* Interchange numerical values as well, for the whole snode, such 
-        * that L is indexed the same way as A.
-        */
-       for (icol = 0; icol <= nsupc; icol++) {
-           itemp = pivptr + icol * nsupr;
-           temp = lu_sup_ptr[itemp];
-           lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
-           lu_sup_ptr[nsupc + icol*nsupr] = temp;
-       }
-    } /* if */
-    /* cdiv operation */
-    ops[FACT] += nsupr - nsupc;
-    temp = 1.0 / lu_col_ptr[nsupc];
-    for (k = nsupc+1; k < nsupr; k++) 
-       lu_col_ptr[k] *= temp;
-    return 0;
diff --git a/superlu/spivotgrowth.c b/superlu/spivotgrowth.c
deleted file mode 100644
index 05a24463..00000000
--- a/superlu/spivotgrowth.c
+++ /dev/null
@@ -1,129 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <math.h>
-#include "slu_sdefs.h"
-sPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
-             SuperMatrix *L, SuperMatrix *U)
- * Purpose
- * =======
- *
- * Compute the reciprocal pivot growth factor of the leading ncols columns
- * of the matrix, using the formula:
- *     min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) )
- *
- * Arguments
- * =========
- *
- * ncols    (input) int
- *          The number of columns of matrices A, L and U.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = NC; Dtype = SLU_S; Mtype = GE.
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SC; Dtype = SLU_S; Mtype = TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = NC;
- *          Dtype = SLU_S; Mtype = TRU.
- *
- */
-    NCformat *Astore;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    float  *Aval, *Lval, *Uval;
-    int      fsupc, nsupr, luptr, nz_in_U;
-    int      i, j, k, oldcol;
-    int      *inv_perm_c;
-    float   rpg, maxaj, maxuj;
-    extern   double slamch_(char *);
-    float   smlnum;
-    float   *luval;
-    /* Get machine constants. */
-    smlnum = slamch_("S");
-    rpg = 1. / smlnum;
-    Astore = A->Store;
-    Lstore = L->Store;
-    Ustore = U->Store;
-    Aval = Astore->nzval;
-    Lval = Lstore->nzval;
-    Uval = Ustore->nzval;
-    inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int));
-    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;
-    for (k = 0; k <= Lstore->nsuper; ++k) {
-       fsupc = L_FST_SUPC(k);
-       nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-       luptr = L_NZ_START(fsupc);
-       luval = &Lval[luptr];
-       nz_in_U = 1;
-       for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) {
-           maxaj = 0.;
-            oldcol = inv_perm_c[j];
-           for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
-               maxaj = SUPERLU_MAX( maxaj, fabs(Aval[i]) );
-           maxuj = 0.;
-           for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
-               maxuj = SUPERLU_MAX( maxuj, fabs(Uval[i]) );
-           /* Supernode */
-           for (i = 0; i < nz_in_U; ++i)
-               maxuj = SUPERLU_MAX( maxuj, fabs(luval[i]) );
-           ++nz_in_U;
-           luval += nsupr;
-           if ( maxuj == 0. )
-               rpg = SUPERLU_MIN( rpg, 1.);
-           else
-               rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
-       }
-       if ( j >= ncols ) break;
-    }
-    SUPERLU_FREE(inv_perm_c);
-    return (rpg);
diff --git a/superlu/spruneL.c b/superlu/spruneL.c
deleted file mode 100644
index 0d5755fb..00000000
--- a/superlu/spruneL.c
+++ /dev/null
@@ -1,156 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_sdefs.h"
-       const int  jcol,             /* in */
-       const int  *perm_r,   /* in */
-       const int  pivrow,    /* in */
-       const int  nseg,             /* in */
-       const int  *segrep,   /* in */
-       const int  *repfnz,   /* in */
-       int        *xprune,   /* out */
-       GlobalLU_t *Glu       /* modified - global LU data structures */
-       )
- * Purpose
- * =======
- *   Prunes the L-structure of supernodes whose L-structure
- *   contains the current pivot row "pivrow"
- *
- */
-    float     utemp;
-    int        jsupno, irep, irep1, kmin, kmax, krow, movnum;
-    int        i, ktemp, minloc, maxloc;
-    int        do_prune; /* logical variable */
-    int        *xsup, *supno;
-    int        *lsub, *xlsub;
-    float     *lusup;
-    int        *xlusup;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    /*
-     * For each supernode-rep irep in U[*,j]
-     */
-    jsupno = supno[jcol];
-    for (i = 0; i < nseg; i++) {
-       irep = segrep[i];
-       irep1 = irep + 1;
-       do_prune = FALSE;
-       /* Don't prune with a zero U-segment */
-       if ( repfnz[irep] == EMPTY )
-               continue;
-       /* If a snode overlaps with the next panel, then the U-segment 
-        * is fragmented into two parts -- irep and irep1. We should let
-        * pruning occur at the rep-column in irep1's snode. 
-        */
-       if ( supno[irep] == supno[irep1] )      /* Don't prune */
-               continue;
-       /*
-        * If it has not been pruned & it has a nonz in row L[pivrow,i]
-        */
-       if ( supno[irep] != jsupno ) {
-           if ( xprune[irep] >= xlsub[irep1] ) {
-               kmin = xlsub[irep];
-               kmax = xlsub[irep1] - 1;
-               for (krow = kmin; krow <= kmax; krow++) 
-                   if ( lsub[krow] == pivrow ) {
-                       do_prune = TRUE;
-                       break;
-                   }
-           }
-           if ( do_prune ) {
-               /* Do a quicksort-type partition
-                * movnum=TRUE means that the num values have to be exchanged.
-                */
-               movnum = FALSE;
-               if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
-                       movnum = TRUE;
-               while ( kmin <= kmax ) {
-                   if ( perm_r[lsub[kmax]] == EMPTY ) 
-                       kmax--;
-                   else if ( perm_r[lsub[kmin]] != EMPTY )
-                       kmin++;
-                   else { /* kmin below pivrow, and kmax above pivrow: 
-                           *   interchange the two subscripts
-                           */
-                       ktemp = lsub[kmin];
-                       lsub[kmin] = lsub[kmax];
-                       lsub[kmax] = ktemp;
-                       /* If the supernode has only one column, then we
-                        * only keep one set of subscripts. For any subscript 
-                        * interchange performed, similar interchange must be 
-                        * done on the numerical values.
-                        */
-                       if ( movnum ) {
-                           minloc = xlusup[irep] + (kmin - xlsub[irep]);
-                           maxloc = xlusup[irep] + (kmax - xlsub[irep]);
-                           utemp = lusup[minloc];
-                           lusup[minloc] = lusup[maxloc];
-                           lusup[maxloc] = utemp;
-                       }
-                       kmin++;
-                       kmax--;
-                   }
-               } /* while */
-               xprune[irep] = kmin;    /* Pruning */
-#ifdef CHK_PRUNE
-       printf("    After spruneL(),using col %d:  xprune[%d] = %d\n", 
-                       jcol, irep, kmin);
-           } /* if do_prune */
-       } /* if */
-    } /* for each U-segment... */
diff --git a/superlu/sreadhb.c b/superlu/sreadhb.c
deleted file mode 100644
index 938f30a3..00000000
--- a/superlu/sreadhb.c
+++ /dev/null
@@ -1,276 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_sdefs.h"
-/* Eat up the rest of the current line */
-int sDumpLine(FILE *fp)
-    register int c;
-    while ((c = fgetc(fp)) != '\n') ;
-    return 0;
-int sParseIntFormat(char *buf, int *num, int *size)
-    char *tmp;
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    sscanf(tmp, "%d", num);
-    while (*tmp != 'I' && *tmp != 'i') ++tmp;
-    ++tmp;
-    sscanf(tmp, "%d", size);
-    return 0;
-int sParseFloatFormat(char *buf, int *num, int *size)
-    char *tmp, *period;
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-    while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd'
-          && *tmp != 'F' && *tmp != 'f') {
-        /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the
-           num picked up refers to P, which should be skipped. */
-        if (*tmp=='p' || *tmp=='P') {
-           ++tmp;
-           *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-        } else {
-           ++tmp;
-        }
-    }
-    ++tmp;
-    period = tmp;
-    while (*period != '.' && *period != ')') ++period ;
-    *period = '\0';
-    *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/
-    return 0;
-int sReadVector(FILE *fp, int n, int *where, int perline, int persize)
-    register int i, j, item;
-    char tmp, buf[100], *dummy;
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           item = atoi(&buf[j*persize]); 
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-           where[i++] = item - 1;
-       }
-    }
-    return 0;
-int sReadValues(FILE *fp, int n, float *destination, int perline, int persize)
-    register int i, j, k, s;
-    char tmp, buf[100], *dummy;
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           s = j*persize;
-           for (k = 0; k < persize; ++k) /* No D_ format in C */
-               if ( buf[s+k] == 'D' || buf[s+k] == 'd' ) buf[s+k] = 'E';
-           destination[i++] = atof(&buf[s]);
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-       }
-    }
-    return 0;
-sreadhb(int *nrow, int *ncol, int *nonz,
-       float **nzval, int **rowind, int **colptr)
- * Purpose
- * =======
- * 
- * Read a FLOAT PRECISION matrix stored in Harwell-Boeing format 
- * as described below.
- * 
- * Line 1 (A72,A8) 
- *     Col. 1 - 72   Title (TITLE) 
- *     Col. 73 - 80  Key (KEY) 
- * 
- * Line 2 (5I14) 
- *     Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
- *     Col. 15 - 28  Number of lines for pointers (PTRCRD) 
- *     Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
- *     Col. 43 - 56  Number of lines for numerical values (VALCRD) 
- *     Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
- *                    (including starting guesses and solution vectors 
- *                    if present) 
- *                   (zero indicates no right-hand side data is present) 
- *
- * Line 3 (A3, 11X, 4I14) 
- *     Col. 1 - 3    Matrix type (see below) (MXTYPE) 
- *     Col. 15 - 28  Number of rows (or variables) (NROW) 
- *     Col. 29 - 42  Number of columns (or elements) (NCOL) 
- *     Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
- *                   (equal to number of entries for assembled matrices) 
- *     Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
- *                   (zero in the case of assembled matrices) 
- * Line 4 (2A16, 2A20) 
- *     Col. 1 - 16   Format for pointers (PTRFMT) 
- *     Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
- *     Col. 33 - 52  Format for numerical values of coefficient matrix 
- *     Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
- *
- * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
- *     Col. 1        Right-hand side type: 
- *                       F for full storage or M for same format as matrix 
- *     Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
- *     Col. 3        X if an exact solution vector(s) is supplied. 
- *     Col. 15 - 28  Number of right-hand sides (NRHS) 
- *     Col. 29 - 42  Number of row indices (NRHSIX) 
- *                   (ignored in case of unassembled matrices) 
- *
- * The three character type field on line 3 describes the matrix type. 
- * The following table lists the permitted values for each of the three 
- * characters. As an example of the type field, RSA denotes that the matrix 
- * is real, symmetric, and assembled. 
- *
- * First Character: 
- *     R Real matrix 
- *     C Complex matrix 
- *     P Pattern only (no numerical values supplied) 
- *
- * Second Character: 
- *     S Symmetric 
- *     U Unsymmetric 
- *     H Hermitian 
- *     Z Skew symmetric 
- *     R Rectangular 
- *
- * Third Character: 
- *     A Assembled 
- *     E Elemental matrices (unassembled) 
- *
- */
-  register int i, numer_lines = 0, rhscrd = 0, dummy;
-    int tmp, colnum, colsize, rownum, rowsize, valnum, valsize;
-    char buf[100], type[4], key[10], *dummyc;
-    FILE *fp;
-    fp = stdin;
-    /* Line 1 */
-    dummyc = fgets(buf, 100, fp);
-    fputs(buf, stdout);
-#if 0
-    dummy = fscanf(fp, "%72c", buf); buf[72] = 0;
-    printf("Title: %s", buf);
-    dummy += fscanf(fp, "%8c", key);  key[8] = 0;
-    printf("Key: %s\n", key);
-    sDumpLine(fp);
-    /* Line 2 */
-    for (i=0; i<5; i++) {
-       dummy += fscanf(fp, "%14c", buf); buf[14] = 0;
-       sscanf(buf, "%d", &tmp);
-       if (i == 3) numer_lines = tmp;
-       if (i == 4 && tmp) rhscrd = tmp;
-    }
-    sDumpLine(fp);
-    /* Line 3 */
-    dummy += fscanf(fp, "%3c", type);
-    dummy += fscanf(fp, "%11c", buf); /* pad */
-    type[3] = 0;
-#ifdef DEBUG
-    printf("Matrix type %s\n", type);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nrow);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", ncol);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nonz);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", &tmp);
-    if (tmp != 0)
-         printf("This is not an assembled matrix!\n");
-    if (*nrow != *ncol)
-       printf("Matrix is not square.\n");
-    sDumpLine(fp);
-    /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */
-    sallocateA(*ncol, *nonz, nzval, rowind, colptr);
-    /* Line 4: format statement */
-    dummy += fscanf(fp, "%16c", buf);
-    sParseIntFormat(buf, &colnum, &colsize);
-    dummy += fscanf(fp, "%16c", buf);
-    sParseIntFormat(buf, &rownum, &rowsize);
-    dummy += fscanf(fp, "%20c", buf);
-    sParseFloatFormat(buf, &valnum, &valsize);
-    dummy += fscanf(fp, "%20c", buf);
-    sDumpLine(fp);
-    /* Line 5: right-hand side */    
-    if ( rhscrd ) sDumpLine(fp); /* skip RHSFMT */
-#ifdef DEBUG
-    printf("%d rows, %d nonzeros\n", *nrow, *nonz);
-    printf("colnum %d, colsize %d\n", colnum, colsize);
-    printf("rownum %d, rowsize %d\n", rownum, rowsize);
-    printf("valnum %d, valsize %d\n", valnum, valsize);
-    sReadVector(fp, *ncol+1, *colptr, colnum, colsize);
-    sReadVector(fp, *nonz, *rowind, rownum, rowsize);
-    if ( numer_lines ) {
-        sReadValues(fp, *nonz, *nzval, valnum, valsize);
-    }
-    fclose(fp);
diff --git a/superlu/ssnode_bmod.c b/superlu/ssnode_bmod.c
deleted file mode 100644
index a476dd5f..00000000
--- a/superlu/ssnode_bmod.c
+++ /dev/null
@@ -1,115 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_sdefs.h"
-extern void strsv_();
-extern void sgemv_();
- * Performs numeric block updates within the relaxed snode. 
- */
-ssnode_bmod (
-           const int  jcol,      /* in */
-           const int  jsupno,    /* in */
-           const int  fsupc,     /* in */
-           float     *dense,    /* in */
-           float     *tempv,    /* working array */
-           GlobalLU_t *Glu,      /* modified */
-           SuperLUStat_t *stat   /* output */
-           )
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-    int            incx = 1, incy = 1;
-    float         alpha = -1.0, beta = 1.0;
-    int            luptr, nsupc, nsupr, nrow;
-    int            isub, irow, i, iptr; 
-    register int   ufirst, nextlu;
-    int            *lsub, *xlsub;
-    float         *lusup;
-    int            *xlusup;
-    flops_t *ops = stat->ops;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nextlu = xlusup[jcol];
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-       dense[irow] = 0;
-       ++nextlu;
-    }
-    xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
-    if ( fsupc < jcol ) {
-       luptr = xlusup[fsupc];
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nsupc = jcol - fsupc;   /* Excluding jcol */
-       ufirst = xlusup[jcol];  /* Points to the beginning of column
-                                  jcol in supernode L\U(jsupno). */
-       nrow = nsupr - nsupc;
-       ops[TRSV] += nsupc * (nsupc - 1);
-       ops[GEMV] += 2 * nrow * nsupc;
-#ifdef _CRAY
-       STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
-                       &lusup[ufirst], &tempv[0] );
-        /* Scatter tempv[*] into lusup[*] */
-       iptr = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           lusup[iptr++] -= tempv[i];
-           tempv[i] = 0.0;
-       }
-    }
-    return 0;
diff --git a/superlu/ssnode_dfs.c b/superlu/ssnode_dfs.c
deleted file mode 100644
index b1536c25..00000000
--- a/superlu/ssnode_dfs.c
+++ /dev/null
@@ -1,113 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_sdefs.h"
-ssnode_dfs (
-          const int  jcol,         /* in - start of the supernode */
-          const int  kcol,         /* in - end of the supernode */
-          const int  *asub,        /* in */
-          const int  *xa_begin,    /* in */
-          const int  *xa_end,      /* in */
-          int        *xprune,      /* out */
-          int        *marker,      /* modified */
-          GlobalLU_t *Glu          /* modified */
-          )
-/* Purpose
- * =======
- *    ssnode_dfs() - Determine the union of the row structures of those 
- *    columns within the relaxed snode.
- *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
- *    the portion outside the rectangular supernode must be zero.
- *
- * Return value
- * ============
- *     0   success;
- *    >0   number of bytes allocated when run out of memory.
- *
- */
-    register int i, k, ifrom, ito, nextl, new_next;
-    int          nsuper, krow, kmark, mem_error;
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    int          nzlmax;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-    nsuper = ++supno[jcol];    /* Next available supernode number */
-    nextl = xlsub[jcol];
-    for (i = jcol; i <= kcol; i++) {
-       /* For each nonzero in A[*,i] */
-       for (k = xa_begin[i]; k < xa_end[i]; k++) {     
-           krow = asub[k];
-           kmark = marker[krow];
-           if ( kmark != kcol ) { /* First time visit krow */
-               marker[krow] = kcol;
-               lsub[nextl++] = krow;
-               if ( nextl >= nzlmax ) {
-                   if ( mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, 
Glu) )
-                       return (mem_error);
-                   lsub = Glu->lsub;
-               }
-           }
-       }
-       supno[i] = nsuper;
-    }
-    /* Supernode > 1, then make a copy of the subscripts for pruning */
-    if ( jcol < kcol ) {
-       new_next = nextl + (nextl - xlsub[jcol]);
-       while ( new_next > nzlmax ) {
-           if ( mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-               return (mem_error);
-           lsub = Glu->lsub;
-       }
-       ito = nextl;
-       for (ifrom = xlsub[jcol]; ifrom < nextl; )
-           lsub[ito++] = lsub[ifrom++];        
-        for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
-       nextl = ito;
-    }
-    xsup[nsuper+1] = kcol + 1;
-    supno[kcol+1]  = nsuper;
-    xprune[kcol]   = nextl;
-    xlsub[kcol+1]  = nextl;
-    return 0;
diff --git a/superlu/ssp_blas2.c b/superlu/ssp_blas2.c
deleted file mode 100644
index 0e14c00b..00000000
--- a/superlu/ssp_blas2.c
+++ /dev/null
@@ -1,481 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * File name:          ssp_blas2.c
- * Purpose:            Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-#include "slu_sdefs.h"
-extern void strsv_();
-extern void sgemv_();
- * Function prototypes 
- */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
- *   Purpose
- *   =======
- *
- *   sp_strsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.
- *                trans = 'C' or 'c'   A'*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *          
- *   L       - (input) SuperMatrix*
- *            The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *             The factor U from the factorization Pr*A*Pc=L*U.
- *             U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
- *    
- *   x       - (input/output) float*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-    SCformat *Lstore;
-    NCformat *Ustore;
-    float   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    float alpha = 1.0, beta = 1.0;
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    float *work;
-    flops_t solve_ops;
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 
-              !lsame_(trans, "C")) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("sp_strsv", &i);
-       return 0;
-    }
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    if ( !(work = floatCalloc(L->nrow)) )
-       ABORT("Malloc fails for work in sp_strsv().");
-    if ( lsame_(trans, "N") ) {        /* Form x := inv(A)*x. */
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L)*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               nrow = nsupr - nsupc;
-               solve_ops += nsupc * (nsupc - 1);
-               solve_ops += 2 * nrow * nsupc;
-               if ( nsupc == 1 ) {
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       x[irow] -= x[fsupc] * Lval[luptr];
-                   }
-               } else {
-#ifdef _CRAY
-                   STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-                   strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-                   slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-                   smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; ++i, ++iptr) {
-                       irow = L_SUB(iptr);
-                       x[irow] -= work[i];     /* Scatter */
-                       work[i] = 0.0;
-                   }
-               }
-           } /* for k ... */
-       } else {
-           /* Form x := inv(U)*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           for (k = Lstore->nsuper; k >= 0; k--) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               solve_ops += nsupc * (nsupc + 1);
-               if ( nsupc == 1 ) {
-                   x[fsupc] /= Lval[luptr];
-                   for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-                       irow = U_SUB(i);
-                       x[irow] -= x[fsupc] * Uval[i];
-                   }
-               } else {
-#ifdef _CRAY
-                   STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-                      &x[fsupc], &incx);
-                   strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-                   susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-                   for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                       solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                       for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-                               i++) {
-                           irow = U_SUB(i);
-                           x[irow] -= x[jcol] * Uval[i];
-                       }
-                    }
-               }
-           } /* for k ... */
-       }
-    } else { /* Form x := inv(A')*x */
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L')*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               solve_ops += 2 * (nsupr - nsupc) * nsupc;
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                       x[jcol] -= x[irow] * Lval[i];
-                       iptr++;
-                   }
-               }
-               if ( nsupc > 1 ) {
-                   solve_ops += nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               }
-           }
-       } else {
-           /* Form x := inv(U')*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                       x[jcol] -= x[irow] * Uval[i];
-                   }
-               }
-               solve_ops += nsupc * (nsupc + 1);
-               if ( nsupc == 1 ) {
-                   x[fsupc] /= Lval[luptr];
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-                   strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-               }
-           } /* for k ... */
-       }
-    }
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x, 
-        int incx, float beta, float *y, int incy)
-/*  Purpose   
-    =======   
-    sp_sgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-    Parameters   
-    ==========   
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-    ALPHA  - (input) float
-             On entry, ALPHA specifies the scalar alpha.   
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 
-             In the future, more general A can be handled.
-    X      - (input) float*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-    BETA   - (input) float
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-    Y      - (output) float*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-    ==== Sparse Level 2 Blas routine.   
-    /* Local variables */
-    NCformat *Astore;
-    float   *Aval;
-    int info;
-    float temp;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)        info = 8;
-    if (info != 0) {
-       xerbla_("sp_sgemv ", &info);
-       return 0;
-    }
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
-       return 0;
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-       lenx = A->ncol;
-       leny = A->nrow;
-    } else {
-       lenx = A->nrow;
-       leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if (beta != 1.) {
-       if (incy == 1) {
-           if (beta == 0.)
-               for (i = 0; i < leny; ++i) y[i] = 0.;
-           else
-               for (i = 0; i < leny; ++i) y[i] = beta * y[i];
-       } else {
-           iy = ky;
-           if (beta == 0.)
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = 0.;
-                   iy += incy;
-               }
-           else
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = beta * y[iy];
-                   iy += incy;
-               }
-       }
-    }
-    if (alpha == 0.) return 0;
-    if ( notran ) {
-       /* Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (incy == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               if (x[jx] != 0.) {
-                   temp = alpha * x[jx];
-                   for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       y[irow] += temp * Aval[i];
-                   }
-               }
-               jx += incx;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    } else {
-       /* Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (incx == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               temp = 0.;
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   temp += Aval[i] * x[irow];
-               }
-               y[jy] += alpha * temp;
-               jy += incy;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    }
-    return 0;
-} /* sp_sgemv */
diff --git a/superlu/ssp_blas3.c b/superlu/ssp_blas3.c
deleted file mode 100644
index f958ac85..00000000
--- a/superlu/ssp_blas3.c
+++ /dev/null
@@ -1,140 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:          sp_blas3.c
- * Purpose:            Sparse BLAS3, using some dense BLAS3 operations.
- */
-#include "slu_sdefs.h"
-sp_sgemm(char *transa, char *transb, int m, int n, int k, 
-         float alpha, SuperMatrix *A, float *b, int ldb, 
-         float beta, float *c, int ldc)
-/*  Purpose   
-    =======   
-    sp_s performs one of the matrix-matrix operations   
-       C := alpha*op( A )*op( B ) + beta*C,   
-    where  op( X ) is one of 
-       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
-    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
-    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
-    Parameters   
-    ==========   
-    TRANSA - (input) char*
-             On entry, TRANSA specifies the form of op( A ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSA = 'N' or 'n',  op( A ) = A.   
-                TRANSA = 'T' or 't',  op( A ) = A'.   
-                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
-             Unchanged on exit.   
-    TRANSB - (input) char*
-             On entry, TRANSB specifies the form of op( B ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSB = 'N' or 'n',  op( B ) = B.   
-                TRANSB = 'T' or 't',  op( B ) = B'.   
-                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
-             Unchanged on exit.   
-    M      - (input) int   
-             On entry,  M  specifies  the number of rows of the matrix 
-            op( A ) and of the matrix C.  M must be at least zero. 
-            Unchanged on exit.   
-    N      - (input) int
-             On entry,  N specifies the number of columns of the matrix 
-            op( B ) and the number of columns of the matrix C. N must be 
-            at least zero.
-            Unchanged on exit.   
-    K      - (input) int
-             On entry, K specifies the number of columns of the matrix 
-            op( A ) and the number of rows of the matrix op( B ). K must 
-            be at least  zero.   
-             Unchanged on exit.
-    ALPHA  - (input) float
-             On entry, ALPHA specifies the scalar alpha.   
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 
-             In the future, more general A can be handled.
-    B      - FLOAT PRECISION array of DIMENSION ( LDB, kb ), where kb is 
-             n when TRANSB = 'N' or 'n',  and is  k otherwise.   
-             Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
-             part of the array B must contain the matrix B, otherwise 
-             the leading n by k part of the array B must contain the 
-             matrix B.   
-             Unchanged on exit.   
-    LDB    - (input) int
-             On entry, LDB specifies the first dimension of B as declared 
-             in the calling (sub) program. LDB must be at least max( 1, n ).  
-             Unchanged on exit.   
-    BETA   - (input) float
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then C need not be set on input.   
-    C      - FLOAT PRECISION array of DIMENSION ( LDC, n ).   
-             Before entry, the leading m by n part of the array C must 
-             contain the matrix C,  except when beta is zero, in which 
-             case C need not be set on entry.   
-             On exit, the array C is overwritten by the m by n matrix 
-            ( alpha*op( A )*B + beta*C ).   
-    LDC    - (input) int
-             On entry, LDC specifies the first dimension of C as declared 
-             in the calling (sub)program. LDC must be at least max(1,m).   
-             Unchanged on exit.   
-    ==== Sparse Level 3 Blas routine.   
-    int    incx = 1, incy = 1;
-    int    j;
-    for (j = 0; j < n; ++j) {
-       sp_sgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
-    }
-    return 0;    
diff --git a/superlu/superlu_timer.c b/superlu/superlu_timer.c
deleted file mode 100644
index dedf19e9..00000000
--- a/superlu/superlu_timer.c
+++ /dev/null
@@ -1,76 +0,0 @@
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * Purpose
- * ======= 
- *     Returns the time in seconds used by the process.
- *
- * Note: the timer function call is machine dependent. Use conditional
- *       compilation to choose the appropriate function.
- *
- */
-#ifdef _MSC_VER
-#define NO_TIMER
-#ifdef SUN 
- *     It uses the system call gethrtime(3C), which is accurate to 
- *     nanoseconds. 
-#include <sys/time.h>
-double SuperLU_timer_() {
-    return ( (double)gethrtime() / 1e9 );
-#ifndef NO_TIMER
-#include <sys/types.h>
-#include <sys/times.h>
-#include <time.h>
-#include <sys/time.h>
-#ifndef CLK_TCK
-#define CLK_TCK 60
-double SuperLU_timer_()
-#ifdef NO_TIMER
-    /* no sys/times.h on WIN32 */
-    double tmp;
-    tmp = 0.0;
-    struct tms use;
-    double tmp;
-    times(&use);
-    tmp = use.tms_utime;
-    tmp += use.tms_stime;
-    return (double)(tmp) / CLK_TCK;
diff --git a/superlu/supermatrix.h b/superlu/supermatrix.h
deleted file mode 100644
index c3dd640b..00000000
--- a/superlu/supermatrix.h
+++ /dev/null
@@ -1,165 +0,0 @@
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#ifndef __SUPERLU_SUPERMATRIX /* allow multiple inclusions */
- * The matrix types are defined as follows. *
- ********************************************/
-typedef enum {
-    SLU_NC,    /* column-wise, no supernode */
-    SLU_NR,    /* row-wize, no supernode */
-    SLU_SC,    /* column-wise, supernode */
-    SLU_SR,    /* row-wise, supernode */
-    SLU_NCP,   /* column-wise, column-permuted, no supernode 
-                  (The consecutive columns of nonzeros, after permutation,
-                  may not be stored  contiguously.) */
-    SLU_DN     /* Fortran style column-wise storage for dense matrix */
-} Stype_t;
-typedef enum {
-    SLU_S,     /* single */
-    SLU_D,     /* double */
-    SLU_C,     /* single complex */
-    SLU_Z      /* double complex */
-} Dtype_t;
-typedef enum {
-    SLU_GE,    /* general */
-    SLU_TRLU,  /* lower triangular, unit diagonal */
-    SLU_TRUU,  /* upper triangular, unit diagonal */
-    SLU_TRL,   /* lower triangular */
-    SLU_TRU,   /* upper triangular */
-    SLU_SYL,   /* symmetric, store lower half */
-    SLU_SYU,   /* symmetric, store upper half */
-    SLU_HEL,   /* Hermitian, store lower half */
-    SLU_HEU    /* Hermitian, store upper half */
-} Mtype_t;
-typedef struct {
-       Stype_t Stype; /* Storage type: interprets the storage structure 
-                         pointed to by *Store. */
-       Dtype_t Dtype; /* Data type. */
-       Mtype_t Mtype; /* Matrix type: describes the mathematical property of 
-                         the matrix. */
-       int_t  nrow;   /* number of rows */
-       int_t  ncol;   /* number of columns */
-       void *Store;   /* pointer to the actual storage of the matrix */
-} SuperMatrix;
- * The storage schemes are defined as follows. *
- ***********************************************/
-/* Stype == NC (Also known as Harwell-Boeing sparse matrix format) */
-typedef struct {
-    int_t  nnz;            /* number of nonzeros in the matrix */
-    void   *nzval;  /* pointer to array of nonzero values, packed by column */
-    int_t  *rowind; /* pointer to array of row indices of the nonzeros */
-    int_t  *colptr; /* pointer to array of beginning of columns in nzval[] 
-                      and rowind[]  */
-                    /* Note:
-                      Zero-based indexing is used;
-                      colptr[] has ncol+1 entries, the last one pointing
-                      beyond the last column, so that colptr[ncol] = nnz. */
-} NCformat;
-/* Stype == NR (Also known as row compressed storage (RCS). */
-typedef struct {
-    int_t nnz;    /* number of nonzeros in the matrix */
-    void  *nzval;  /* pointer to array of nonzero values, packed by row */
-    int_t *colind; /* pointer to array of column indices of the nonzeros */
-    int_t *rowptr; /* pointer to array of beginning of rows in nzval[] 
-                      and colind[]  */
-                   /* Note:
-                     Zero-based indexing is used;
-                     nzval[] and colind[] are of the same length, nnz;
-                     rowptr[] has nrow+1 entries, the last one pointing
-                     beyond the last column, so that rowptr[nrow] = nnz. */
-} NRformat;
-/* Stype == SC */
-typedef struct {
-  int_t  nnz;       /* number of nonzeros in the matrix */
-  int_t  nsuper;     /* number of supernodes, minus 1 */
-  void *nzval;       /* pointer to array of nonzero values, packed by column */
-  int_t *nzval_colptr;/* pointer to array of beginning of columns in nzval[] */
-  int_t *rowind;     /* pointer to array of compressed row indices of 
-                       rectangular supernodes */
-  int_t *rowind_colptr;/* pointer to array of beginning of columns in rowind[] 
-  int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column 
-                       j belongs; mapping from column to supernode number. */
-  int_t *sup_to_col; /* sup_to_col[s] points to the start of the s-th 
-                       supernode; mapping from supernode number to column.
-                       e.g.: col_to_sup: 0 1 2 2 3 3 3 4 4 4 4 4 4 (ncol=12)
-                             sup_to_col: 0 1 2 4 7 12           (nsuper=4) */
-                     /* Note:
-                       Zero-based indexing is used;
-                       nzval_colptr[], rowind_colptr[], col_to_sup and
-                       sup_to_col[] have ncol+1 entries, the last one
-                       pointing beyond the last column.
-                       For col_to_sup[], only the first ncol entries are
-                       defined. For sup_to_col[], only the first nsuper+2
-                       entries are defined. */
-} SCformat;
-/* Stype == NCP */
-typedef struct {
-    int_t nnz;   /* number of nonzeros in the matrix */
-    void *nzval;  /* pointer to array of nonzero values, packed by column */
-    int_t *rowind;/* pointer to array of row indices of the nonzeros */
-                 /* Note: nzval[]/rowind[] always have the same length */
-    int_t *colbeg;/* colbeg[j] points to the beginning of column j in nzval[] 
-                     and rowind[]  */
-    int_t *colend;/* colend[j] points to one past the last element of column
-                    j in nzval[] and rowind[]  */
-                 /* Note:
-                    Zero-based indexing is used;
-                    The consecutive columns of the nonzeros may not be 
-                    contiguous in storage, because the matrix has been 
-                    postmultiplied by a column permutation matrix. */
-} NCPformat;
-/* Stype == DN */
-typedef struct {
-    int_t lda;    /* leading dimension */
-    void *nzval;  /* array of size lda*ncol to represent a dense matrix */
-} DNformat;
- * Macros used for easy access of sparse matrix entries. *
- *********************************************************/
-#define L_SUB_START(col)     ( Lstore->rowind_colptr[col] )
-#define L_SUB(ptr)           ( Lstore->rowind[ptr] )
-#define L_NZ_START(col)      ( Lstore->nzval_colptr[col] )
-#define L_FST_SUPC(superno)  ( Lstore->sup_to_col[superno] )
-#define U_NZ_START(col)      ( Ustore->colptr[col] )
-#define U_SUB(ptr)           ( Ustore->rowind[ptr] )
-#ifdef __cplusplus
-extern "C" 
-int handle_getfem_callback(); /* this one is in ../src/ */
-#endif  /* __SUPERLU_SUPERMATRIX */
diff --git a/superlu/sutil.c b/superlu/sutil.c
deleted file mode 100644
index 09b51d49..00000000
--- a/superlu/sutil.c
+++ /dev/null
@@ -1,478 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <math.h>
-#include "slu_sdefs.h"
-sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      float *nzval, int *rowind, int *colptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    NCformat *Astore;
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->rowind = rowind;
-    Astore->colptr = colptr;
-sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      float *nzval, int *colind, int *rowptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    NRformat *Astore;
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->colind = colind;
-    Astore->rowptr = rowptr;
-/* Copy matrix A into matrix B. */
-sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
-    NCformat *Astore, *Bstore;
-    int      ncol, nnz, i;
-    B->Stype = A->Stype;
-    B->Dtype = A->Dtype;
-    B->Mtype = A->Mtype;
-    B->nrow  = A->nrow;;
-    B->ncol  = ncol = A->ncol;
-    Astore   = (NCformat *) A->Store;
-    Bstore   = (NCformat *) B->Store;
-    Bstore->nnz = nnz = Astore->nnz;
-    for (i = 0; i < nnz; ++i)
-       ((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i];
-    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
-    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
-sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
-                   Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    DNformat    *Xstore;
-    X->Stype = stype;
-    X->Dtype = dtype;
-    X->Mtype = mtype;
-    X->nrow = m;
-    X->ncol = n;
-    X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
-    Xstore = (DNformat *) X->Store;
-    Xstore->lda = ldx;
-    Xstore->nzval = (float *) x;
-sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
-                       float *Y, int ldy)
- *
- *  Purpose
- *  =======
- *
- *  Copies a two-dimensional matrix X to another matrix Y.
- */
-    int    i, j;
-    for (j = 0; j < N; ++j)
-        for (i = 0; i < M; ++i)
-            Y[i + j*ldy] = X[i + j*ldx];
-sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 
-                       float *nzval, int *nzval_colptr, int *rowind,
-                       int *rowind_colptr, int *col_to_sup, int *sup_to_col,
-                       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    SCformat *Lstore;
-    L->Stype = stype;
-    L->Dtype = dtype;
-    L->Mtype = mtype;
-    L->nrow = m;
-    L->ncol = n;
-    L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
-    if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
-    Lstore = L->Store;
-    Lstore->nnz = nnz;
-    Lstore->nsuper = col_to_sup[n];
-    Lstore->nzval = nzval;
-    Lstore->nzval_colptr = nzval_colptr;
-    Lstore->rowind = rowind;
-    Lstore->rowind_colptr = rowind_colptr;
-    Lstore->col_to_sup = col_to_sup;
-    Lstore->sup_to_col = sup_to_col;
- * Convert a row compressed storage into a column compressed storage.
- */
-sCompRow_to_CompCol(int m, int n, int nnz, 
-                   float *a, int *colind, int *rowptr,
-                   float **at, int **rowind, int **colptr)
-    register int i, j, col, relpos;
-    int *marker;
-    /* Allocate storage for another copy of the matrix. */
-    *at = (float *) floatMalloc(nnz);
-    *rowind = (int *) intMalloc(nnz);
-    *colptr = (int *) intMalloc(n+1);
-    marker = (int *) intCalloc(n);
-    /* Get counts of each column of A, and set up column pointers */
-    for (i = 0; i < m; ++i)
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
-    (*colptr)[0] = 0;
-    for (j = 0; j < n; ++j) {
-       (*colptr)[j+1] = (*colptr)[j] + marker[j];
-       marker[j] = (*colptr)[j];
-    }
-    /* Transfer the matrix into the compressed column storage. */
-    for (i = 0; i < m; ++i) {
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
-           col = colind[j];
-           relpos = marker[col];
-           (*rowind)[relpos] = i;
-           (*at)[relpos] = a[j];
-           ++marker[col];
-       }
-    }
-    SUPERLU_FREE(marker);
-sPrint_CompCol_Matrix(char *what, SuperMatrix *A)
-    NCformat     *Astore;
-    register int i,n;
-    float       *dp;
-    printf("\nCompCol matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (NCformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
-    printf("nzval: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%d  ", Astore->rowind[i]);
-    printf("\ncolptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->colptr[i]);
-    printf("\n");
-    fflush(stdout);
-sPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
-    SCformat     *Astore;
-    register int i, j, k, c, d, n, nsup;
-    float       *dp;
-    int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
-    printf("\nSuperNode matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (SCformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    col_to_sup = Astore->col_to_sup;
-    sup_to_col = Astore->sup_to_col;
-    rowind_colptr = Astore->rowind_colptr;
-    rowind = Astore->rowind;
-    printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", 
-          A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
-    printf("nzval:\n");
-    for (k = 0; k <= Astore->nsuper; ++k) {
-      c = sup_to_col[k];
-      nsup = sup_to_col[k+1] - c;
-      for (j = c; j < c + nsup; ++j) {
-       d = Astore->nzval_colptr[j];
-       for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
-         printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
-       }
-      }
-    }
-#if 0
-    for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nnzval_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->nzval_colptr[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->rowind_colptr[n]; ++i) 
-        printf("%d  ", Astore->rowind[i]);
-    printf("\nrowind_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->rowind_colptr[i]);
-    printf("\ncol_to_sup: ");
-    for (i = 0; i < n; ++i) printf("%d  ", col_to_sup[i]);
-    printf("\nsup_to_col: ");
-    for (i = 0; i <= Astore->nsuper+1; ++i) 
-        printf("%d  ", sup_to_col[i]);
-    printf("\n");
-    fflush(stdout);
-sPrint_Dense_Matrix(char *what, SuperMatrix *A)
-    DNformat     *Astore;
-    register int i, j, lda = Astore->lda;
-    float       *dp;
-    printf("\nDense matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    Astore = (DNformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
-    printf("\nnzval: ");
-    for (j = 0; j < A->ncol; ++j) {
-        for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i + j*lda]);
-        printf("\n");
-    }
-    printf("\n");
-    fflush(stdout);
- * Diagnostic print of column "jcol" in the U/L factor.
- */
-sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
-    int     i, k, fsupc;
-    int     *xsup, *supno;
-    int     *xlsub, *lsub;
-    float  *lusup;
-    int     *xlusup;
-    float  *ucol;
-    int     *usub, *xusub;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    printf("%s", msg);
-    printf("col %d: pivrow %d, supno %d, xprune %d\n", 
-          jcol, pivrow, supno[jcol], xprune[jcol]);
-    printf("\tU-col:\n");
-    for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
-       printf("\t%d%10.4f\n", usub[i], ucol[i]);
-    printf("\tL-col in rectangular snode:\n");
-    fsupc = xsup[supno[jcol]]; /* first col of the snode */
-    i = xlsub[fsupc];
-    k = xlusup[jcol];
-    while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
-       printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
-       i++; k++;
-    }
-    fflush(stdout);
- * Check whether tempv[] == 0. This should be true before and after 
- * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 
- */
-void scheck_tempv(int n, float *tempv)
-    int i;
-    for (i = 0; i < n; i++) {
-       if (tempv[i] != 0.0) 
-       {
-           fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
-           ABORT("scheck_tempv");
-       }
-    }
-sGenXtrue(int n, int nrhs, float *x, int ldx)
-    int  i, j;
-    for (j = 0; j < nrhs; ++j)
-       for (i = 0; i < n; ++i) {
-           x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/
-       }
- * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
- */
-sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
-         SuperMatrix *A, SuperMatrix *B)
-    NCformat *Astore;
-    float   *Aval;
-    DNformat *Bstore;
-    float   *rhs;
-    float one = 1.0;
-    float zero = 0.0;
-    int      ldc;
-    char transc[1];
-    Astore = A->Store;
-    Aval   = (float *) Astore->nzval;
-    Bstore = B->Store;
-    rhs    = Bstore->nzval;
-    ldc    = Bstore->lda;
-    if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
-    else *(unsigned char *)transc = 'T';
-    sp_sgemm(transc, "N", A->nrow, nrhs, A->ncol, one, A,
-            x, ldx, zero, rhs, ldc);
- * Fills a float precision array with a given value.
- */
-sfill(float *a, int alen, float dval)
-    register int i;
-    for (i = 0; i < alen; i++) a[i] = dval;
- * Check the inf-norm of the error vector 
- */
-void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
-    DNformat *Xstore;
-    float err, xnorm;
-    float *Xmat, *soln_work;
-    int i, j;
-    Xstore = X->Store;
-    Xmat = Xstore->nzval;
-    for (j = 0; j < nrhs; j++) {
-      soln_work = &Xmat[j*Xstore->lda];
-      err = xnorm = 0.0;
-      for (i = 0; i < X->nrow; i++) {
-       err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
-       xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
-      }
-      err = err / xnorm;
-      printf("||X - Xtrue||/||X|| = %e\n", err);
-    }
-/* Print performance of the code. */
-sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
-           float rpg, float rcond, float *ferr,
-           float *berr, char *equed, SuperLUStat_t *stat)
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *utime;
-    flops_t  *ops;
-    utime = stat->utime;
-    ops   = stat->ops;
-    if ( utime[FACT] != 0. )
-       printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
-              ops[FACT]*1e-6/utime[FACT]);
-    printf("Identify relaxed snodes    = %8.2f\n", utime[RELAX]);
-    if ( utime[SOLVE] != 0. )
-       printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
-              ops[SOLVE]*1e-6/utime[SOLVE]);
-    Lstore = (SCformat *) L->Store;
-    Ustore = (NCformat *) U->Store;
-    printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
-    printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
-    printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
-          mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
-          mem_usage->expansions);
-    printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
-    printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
-          utime[FACT], ops[FACT]*1e-6/utime[FACT],
-          utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
-          utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
-    printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
-    printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
-          rpg, rcond, ferr[0], berr[0], equed);
-int print_float_vec(char *what, int n, float *vec)
-    int i;
-    printf("%s: n %d\n", what, n);
-    for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);
-    return 0;
diff --git a/superlu/util.c b/superlu/util.c
deleted file mode 100644
index c803162b..00000000
--- a/superlu/util.c
+++ /dev/null
@@ -1,405 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <math.h>
-#include "slu_ddefs.h"
- * Global statistics variale
- */
-void superlu_abort_and_exit(char* msg)
-  fprintf(stderr, "%s",  msg);
-  exit (-1);
- * Set the default values for the options argument.
- */
-void set_default_options(superlu_options_t *options)
-    options->Fact = DOFACT;
-    options->Equil = YES;
-    options->ColPerm = COLAMD;
-    options->DiagPivotThresh = 1.0;
-    options->Trans = NOTRANS;
-    options->IterRefine = NOREFINE;
-    options->SymmetricMode = NO;
-    options->PivotGrowth = NO;
-    options->ConditionNumber = NO;
-    options->PrintStat = YES;
- * Print the options setting.
- */
-void print_options(superlu_options_t *options)
-    printf(".. options:\n");
-    printf("\tFact\t %8d\n", options->Fact);
-    printf("\tEquil\t %8d\n", options->Equil);
-    printf("\tColPerm\t %8d\n", options->ColPerm);
-    printf("\tDiagPivotThresh %8.4f\n", options->DiagPivotThresh);
-    printf("\tTrans\t %8d\n", options->Trans);
-    printf("\tIterRefine\t%4d\n", options->IterRefine);
-    printf("\tSymmetricMode\t%4d\n", options->SymmetricMode);
-    printf("\tPivotGrowth\t%4d\n", options->PivotGrowth);
-    printf("\tConditionNumber\t%4d\n", options->ConditionNumber);
-    printf("..\n");
-/* Deallocate the structure pointing to the actual storage of the matrix. */
-Destroy_SuperMatrix_Store(SuperMatrix *A)
-    SUPERLU_FREE ( A->Store );
-Destroy_CompCol_Matrix(SuperMatrix *A)
-    SUPERLU_FREE( ((NCformat *)A->Store)->rowind );
-    SUPERLU_FREE( ((NCformat *)A->Store)->colptr );
-    SUPERLU_FREE( ((NCformat *)A->Store)->nzval );
-    SUPERLU_FREE( A->Store );
-Destroy_CompRow_Matrix(SuperMatrix *A)
-    SUPERLU_FREE( ((NRformat *)A->Store)->colind );
-    SUPERLU_FREE( ((NRformat *)A->Store)->rowptr );
-    SUPERLU_FREE( ((NRformat *)A->Store)->nzval );
-    SUPERLU_FREE( A->Store );
-Destroy_SuperNode_Matrix(SuperMatrix *A)
-    SUPERLU_FREE ( ((SCformat *)A->Store)->rowind );
-    SUPERLU_FREE ( ((SCformat *)A->Store)->rowind_colptr );
-    SUPERLU_FREE ( ((SCformat *)A->Store)->nzval );
-    SUPERLU_FREE ( ((SCformat *)A->Store)->nzval_colptr );
-    SUPERLU_FREE ( ((SCformat *)A->Store)->col_to_sup );
-    SUPERLU_FREE ( ((SCformat *)A->Store)->sup_to_col );
-    SUPERLU_FREE ( A->Store );
-/* A is of type Stype==NCP */
-Destroy_CompCol_Permuted(SuperMatrix *A)
-    SUPERLU_FREE ( ((NCPformat *)A->Store)->colbeg );
-    SUPERLU_FREE ( ((NCPformat *)A->Store)->colend );
-    SUPERLU_FREE ( A->Store );
-/* A is of type Stype==DN */
-Destroy_Dense_Matrix(SuperMatrix *A)
-    DNformat* Astore = A->Store;
-    SUPERLU_FREE (Astore->nzval);
-    SUPERLU_FREE ( A->Store );
- * Reset repfnz[] for the current column 
- */
-resetrep_col (const int nseg, const int *segrep, int *repfnz)
-    int i, irep;
-    for (i = 0; i < nseg; i++) {
-       irep = segrep[i];
-       repfnz[irep] = EMPTY;
-    }
- * Count the total number of nonzeros in factors L and U,  and in the 
- * symmetrically reduced L. 
- */
-countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu)
-    int          nsuper, fsupc, i, j;
-    int          nnzL0, jlen, irep;
-    int          *xsup, *xlsub;
-    xsup   = Glu->xsup;
-    xlsub  = Glu->xlsub;
-    *nnzL  = 0;
-    *nnzU  = (Glu->xusub)[n];
-    nnzL0  = 0;
-    nsuper = (Glu->supno)[n];
-    if ( n <= 0 ) return;
-    /* 
-     * For each supernode
-     */
-    for (i = 0; i <= nsuper; i++) {
-       fsupc = xsup[i];
-       jlen = xlsub[fsupc+1] - xlsub[fsupc];
-       for (j = fsupc; j < xsup[i+1]; j++) {
-           *nnzL += jlen;
-           *nnzU += j - fsupc + 1;
-           jlen--;
-       }
-       irep = xsup[i+1] - 1;
-       nnzL0 += xprune[irep] - xlsub[irep];
-    }
-    /* printf("\tNo of nonzeros in symm-reduced L = %d\n", nnzL0);*/
- * Fix up the data storage lsub for L-subscripts. It removes the subscript
- * sets for structural pruning,        and applies permuation to the remaining
- * subscripts.
- */
-fixupL(const int n, const int *perm_r, GlobalLU_t *Glu)
-    register int nsuper, fsupc, nextl, i, j, k, jstrt;
-    int          *xsup, *lsub, *xlsub;
-    if ( n <= 1 ) return;
-    xsup   = Glu->xsup;
-    lsub   = Glu->lsub;
-    xlsub  = Glu->xlsub;
-    nextl  = 0;
-    nsuper = (Glu->supno)[n];
-    /* 
-     * For each supernode ...
-     */
-    for (i = 0; i <= nsuper; i++) {
-       fsupc = xsup[i];
-       jstrt = xlsub[fsupc];
-       xlsub[fsupc] = nextl;
-       for (j = jstrt; j < xlsub[fsupc+1]; j++) {
-           lsub[nextl] = perm_r[lsub[j]]; /* Now indexed into P*A */
-           nextl++;
-       }
-       for (k = fsupc+1; k < xsup[i+1]; k++) 
-               xlsub[k] = nextl;       /* Other columns in supernode i */
-    }
-    xlsub[n] = nextl;
- * Diagnostic print of segment info after panel_dfs().
- */
-void print_panel_seg(int n, int w, int jcol, int nseg, 
-                    int *segrep, int *repfnz)
-    int j, k;
-    for (j = jcol; j < jcol+w; j++) {
-       printf("\tcol %d:\n", j);
-       for (k = 0; k < nseg; k++)
-           printf("\t\tseg %d, segrep %d, repfnz %d\n", k, 
-                       segrep[k], repfnz[(j-jcol)*n + segrep[k]]);
-    }
-StatInit(SuperLUStat_t *stat)
-    register int i, w, panel_size, relax;
-    panel_size = sp_ienv(1);
-    relax = sp_ienv(2);
-    w = SUPERLU_MAX(panel_size, relax);
-    stat->panel_histo = intCalloc(w+1);
-    stat->utime = (double *) SUPERLU_MALLOC(NPHASES * sizeof(double));
-    if (!stat->utime) ABORT("SUPERLU_MALLOC fails for stat->utime");
-    stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t));
-    if (!stat->ops) ABORT("SUPERLU_MALLOC fails for stat->ops");
-    for (i = 0; i < NPHASES; ++i) {
-        stat->utime[i] = 0.;
-        stat->ops[i] = 0.;
-    }
-StatPrint(SuperLUStat_t *stat)
-    double         *utime;
-    flops_t        *ops;
-    utime = stat->utime;
-    ops   = stat->ops;
-    printf("Factor time  = %8.2f\n", utime[FACT]);
-    if ( utime[FACT] != 0.0 )
-      printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
-            ops[FACT]*1e-6/utime[FACT]);
-    printf("Solve time   = %8.2f\n", utime[SOLVE]);
-    if ( utime[SOLVE] != 0.0 )
-      printf("Solve flops = %e\tMflops = %8.2f\n", ops[SOLVE],
-            ops[SOLVE]*1e-6/utime[SOLVE]);
-StatFree(SuperLUStat_t *stat)
-    SUPERLU_FREE(stat->panel_histo);
-    SUPERLU_FREE(stat->utime);
-    SUPERLU_FREE(stat->ops);
-LUFactFlops(SuperLUStat_t *stat)
-    return (stat->ops[FACT]);
-LUSolveFlops(SuperLUStat_t *stat)
-    return (stat->ops[SOLVE]);
- * Fills an integer array with a given value.
- */
-void ifill(int *a, int alen, int ival)
-    register int i;
-    for (i = 0; i < alen; i++) a[i] = ival;
- * Get the statistics of the supernodes 
- */
-#define NBUCKS 10
-static         int     max_sup_size;
-void super_stats(int nsuper, int *xsup)
-    register int nsup1 = 0;
-    int          i, isize, whichb, bl, bh;
-    int          bucket[NBUCKS];
-    max_sup_size = 0;
-    for (i = 0; i <= nsuper; i++) {
-       isize = xsup[i+1] - xsup[i];
-       if ( isize == 1 ) nsup1++;
-       if ( max_sup_size < isize ) max_sup_size = isize;       
-    }
-    printf("    Supernode statistics:\n\tno of super = %d\n", nsuper+1);
-    printf("\tmax supernode size = %d\n", max_sup_size);
-    printf("\tno of size 1 supernodes = %d\n", nsup1);
-    /* Histogram of the supernode sizes */
-    ifill (bucket, NBUCKS, 0);
-    for (i = 0; i <= nsuper; i++) {
-        isize = xsup[i+1] - xsup[i];
-        whichb = (float) isize / max_sup_size * NBUCKS;
-        if (whichb >= NBUCKS) whichb = NBUCKS - 1;
-        bucket[whichb]++;
-    }
-    printf("\tHistogram of supernode sizes:\n");
-    for (i = 0; i < NBUCKS; i++) {
-        bl = (float) i * max_sup_size / NBUCKS;
-        bh = (float) (i+1) * max_sup_size / NBUCKS;
-        printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);
-    }
-float SpaSize(int n, int np, float sum_npw)
-    return (sum_npw*8 + np*8 + n*4)/1024.;
-float DenseSize(int n, float sum_nw)
-    return (sum_nw*8 + n*8)/1024.;;
- * Check whether repfnz[] == EMPTY after reset.
- */
-void check_repfnz(int n, int w, int jcol, int *repfnz)
-    int jj, k;
-    for (jj = jcol; jj < jcol+w; jj++) 
-       for (k = 0; k < n; k++)
-           if ( repfnz[(jj-jcol)*n + k] != EMPTY ) {
-               fprintf(stderr, "col %d, repfnz_col[%d] = %d\n", jj,
-                       k, repfnz[(jj-jcol)*n + k]);
-               ABORT("check_repfnz");
-           }
-/* Print a summary of the testing results. */
-PrintSumm(char *type, int nfail, int nrun, int nerrs)
-    if ( nfail > 0 )
-       printf("%3s driver: %d out of %d tests failed to pass the threshold\n",
-              type, nfail, nrun);
-    else
-       printf("All tests for %3s driver passed the threshold (%6d tests 
run)\n", type, nrun);
-    if ( nerrs > 0 )
-       printf("%6d error messages recorded\n", nerrs);
-int print_int_vec(char *what, int n, int *vec)
-    int i;
-    printf("%s\n", what);
-    for (i = 0; i < n; ++i) printf("%d\t%d\n", i, vec[i]);
-    return 0;
diff --git a/superlu/xerbla.c b/superlu/xerbla.c
deleted file mode 100644
index c7995cb9..00000000
--- a/superlu/xerbla.c
+++ /dev/null
@@ -1,83 +0,0 @@
-#include <stdio.h>
-#include "slu_Cnames.h"
-/* Subroutine */ int xerbla_(char *srname, int *info)
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Copyright (c) 1992-2013 The University of Tennessee and The University
-                        of Tennessee Research Foundation.  All rights
-                        reserved.
-       Copyright (c) 2000-2013 The University of California Berkeley. All
-                        rights reserved.
-       Copyright (c) 2006-2013 The University of Colorado Denver.  All rights
-                        reserved.
-       Redistribution and use in source and binary forms, with or without
-       modification, are permitted provided that the following conditions are
-       met:
-       - Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-       - Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer listed
-         in this license in the documentation and/or other materials
-         provided with the distribution.
-       - Neither the name of the copyright holders nor the names of its
-         contributors may be used to endorse or promote products derived from
-         this software without specific prior written permission.
-       The copyright holders provide no reassurances that the source code
-       provided does not infringe any patent, copyright, or any other
-       intellectual property rights of third parties.  The copyright holders
-       disclaim any liability to any recipient for claims brought against
-       recipient by any third party for infringement of that parties
-       intellectual property rights.
-    Purpose   
-    =======   
-    XERBLA  is an error handler for the LAPACK routines.   
-    It is called by an LAPACK routine if an input parameter has an   
-    invalid value.  A message is printed and execution stops.   
-    Installers may consider modifying the STOP statement in order to   
-    call system-specific exception-handling facilities.   
-    Arguments   
-    =========   
-    SRNAME  (input) CHARACTER*6   
-            The name of the routine which called XERBLA.   
-    INFO    (input) INT   
-            The position of the invalid parameter in the parameter list   
-            of the calling routine.   
-   ===================================================================== 
-    printf("** On entry to %6s, parameter number %2d had an illegal value\n",
-               srname, *info);
-/*     End of XERBLA */
-    return 0;
-} /* xerbla_ */
diff --git a/superlu/zcolumn_bmod.c b/superlu/zcolumn_bmod.c
deleted file mode 100644
index f5168ba5..00000000
--- a/superlu/zcolumn_bmod.c
+++ /dev/null
@@ -1,363 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_zdefs.h"
-extern void ztrsv_();
-extern void zgemv_();
- * Function prototypes 
- */
-void zusolve(int, int, doublecomplex*, doublecomplex*);
-void zlsolve(int, int, doublecomplex*, doublecomplex*);
-void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);
-/* Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-zcolumn_bmod (
-            const int  jcol,     /* in */
-            const int  nseg,     /* in */
-            doublecomplex     *dense,    /* in */
-            doublecomplex     *tempv,    /* working array */
-            int        *segrep,  /* in */
-            int        *repfnz,  /* in */
-            int        fpanelc,  /* in -- first column in the current panel */
-            GlobalLU_t *Glu,     /* modified */
-            SuperLUStat_t *stat  /* output */
-            )
- * Purpose:
- * ========
- *    Performs numeric block updates (sup-col) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-    int         incx = 1, incy = 1;
-    doublecomplex      alpha, beta;
-    /* krep = representative of current k-th supernode
-     * fsupc = first supernodal column
-     * nsupc = no of columns in supernode
-     * nsupr = no of rows in supernode (used as leading dimension)
-     * luptr = location of supernodal LU-block in storage
-     * kfnz = first nonz in the k-th supernodal segment
-     * no_zeros = no of leading zeros in a supernodal U-segment
-     */
-    doublecomplex       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          fsupc, nsupc, nsupr, segsze;
-    int          nrow;   /* No of rows in the matrix of matrix-vector */
-    int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
-    register int lptr, kfnz, isub, irow, i;
-    register int no_zeros, new_next; 
-    int          ufirst, nextlu;
-    int          fst_col; /* First column within small LU update */
-    int          d_fsupc; /* Distance between the first column of the current
-                            panel and the first column of the current snode. */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    doublecomplex       *lusup;
-    int          *xlusup;
-    int          nzlumax;
-    doublecomplex       *tempv1;
-    doublecomplex      zero = {0.0, 0.0};
-    doublecomplex      one = {1.0, 0.0};
-    doublecomplex      none = {-1.0, 0.0};
-    doublecomplex       comp_temp, comp_temp1;
-    int          mem_error;
-    flops_t      *ops = stat->ops;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nzlumax = Glu->nzlumax;
-    jcolp1 = jcol + 1;
-    jsupno = supno[jcol];
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k];
-       k--;
-       ksupno = supno[krep];
-       if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
-           fsupc = xsup[ksupno];
-           fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-           /* Distance from the current supernode to the current panel; 
-              d_fsupc=0 if fsupc > fpanelc. */
-           d_fsupc = fst_col - fsupc; 
-           luptr = xlusup[fst_col] + d_fsupc;
-           lptr = xlsub[fsupc] + d_fsupc;
-           kfnz = repfnz[krep];
-           kfnz = SUPERLU_MAX ( kfnz, fpanelc );
-           segsze = krep - kfnz + 1;
-           nsupc = krep - fst_col + 1;
-           nsupr = xlsub[fsupc+1] - xlsub[fsupc];      /* Leading dimension */
-           nrow = nsupr - d_fsupc - nsupc;
-           krep_ind = lptr + nsupc - 1;
-           ops[TRSV] += 4 * segsze * (segsze - 1);
-           ops[GEMV] += 8 * nrow * segsze;
-           /* 
-            * Case 1: Update U-segment of size 1 -- col-col update 
-            */
-           if ( segsze == 1 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc;
-               for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                   irow = lsub[i];
-                   zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                   z_sub(&dense[irow], &dense[irow], &comp_temp);
-                   luptr++;
-               }
-           } else if ( segsze <= 3 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc-1;
-               ukj1 = dense[lsub[krep_ind - 1]];
-               luptr1 = luptr - nsupr;
-               if ( segsze == 2 ) { /* Case 2: 2cols-col update */
-                   zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                   z_sub(&ukj, &ukj, &comp_temp);
-                   dense[lsub[krep_ind]] = ukj;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                       z_add(&comp_temp, &comp_temp, &comp_temp1);
-                       z_sub(&dense[irow], &dense[irow], &comp_temp);
-                   }
-               } else { /* Case 3: 3cols-col update */
-                   ukj2 = dense[lsub[krep_ind - 2]];
-                   luptr2 = luptr1 - nsupr;
-                   zz_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                   z_sub(&ukj1, &ukj1, &comp_temp);
-                   zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                   zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                   z_add(&comp_temp, &comp_temp, &comp_temp1);
-                   z_sub(&ukj, &ukj, &comp_temp);
-                   dense[lsub[krep_ind]] = ukj;
-                   dense[lsub[krep_ind-1]] = ukj1;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       luptr2++;
-                       zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                       z_add(&comp_temp, &comp_temp, &comp_temp1);
-                       zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       z_add(&comp_temp, &comp_temp, &comp_temp1);
-                       z_sub(&dense[irow], &dense[irow], &comp_temp);
-                   }
-               }
-           } else {
-               /*
-                * Case: sup-col update
-                * Perform a triangular solve and block update,
-                * then scatter the result of sup-col update to dense
-                */
-               no_zeros = kfnz - fst_col;
-               /* Copy U[*,j] segment from dense[*] to tempv[*] */
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   tempv[i] = dense[irow];
-                   ++isub; 
-               }
-               /* Dense triangular solve -- start effective triangle */
-               luptr += nsupr * no_zeros + no_zeros; 
-#ifdef _CRAY
-               CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-               ztrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-                alpha = one;
-                beta = zero;
-#ifdef _CRAY
-               CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-               zgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-               zlsolve ( nsupr, segsze, &lusup[luptr], tempv );
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-               zmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
-                /* Scatter tempv[] into SPA dense[] as a temporary storage */
-                isub = lptr + no_zeros;
-                for (i = 0; i < segsze; i++) {
-                    irow = lsub[isub];
-                    dense[irow] = tempv[i];
-                    tempv[i] = zero;
-                    ++isub;
-                }
-               /* Scatter tempv1[] into SPA dense[] */
-               for (i = 0; i < nrow; i++) {
-                   irow = lsub[isub];
-                   z_sub(&dense[irow], &dense[irow], &tempv1[i]);
-                   tempv1[i] = zero;
-                   ++isub;
-               }
-           }
-       } /* if jsupno ... */
-    } /* for each segment... */
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    nextlu = xlusup[jcol];
-    fsupc = xsup[jsupno];
-    /* Copy the SPA dense into L\U[*,j] */
-    new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
-    while ( new_next > nzlumax ) {
-       if (mem_error = zLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
-           return (mem_error);
-       lusup = Glu->lusup;
-       lsub = Glu->lsub;
-    }
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = zero;
-       ++nextlu;
-    }
-    xlusup[jcolp1] = nextlu;   /* Close L\U[*,jcol] */
-    /* For more updates within the panel (also within the current supernode), 
-     * should start from the first column of the panel, or the first column 
-     * of the supernode, whichever is bigger. There are 2 cases:
-     *    1) fsupc < fpanelc, then fst_col := fpanelc
-     *    2) fsupc >= fpanelc, then fst_col := fsupc
-     */
-    fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-    if ( fst_col < jcol ) {
-       /* Distance between the current supernode and the current panel.
-          d_fsupc=0 if fsupc >= fpanelc. */
-       d_fsupc = fst_col - fsupc;
-       lptr = xlsub[fsupc] + d_fsupc;
-       luptr = xlusup[fst_col] + d_fsupc;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];  /* Leading dimension */
-       nsupc = jcol - fst_col; /* Excluding jcol */
-       nrow = nsupr - d_fsupc - nsupc;
-       /* Points to the beginning of jcol in snode L\U(jsupno) */
-       ufirst = xlusup[jcol] + d_fsupc;        
-       ops[TRSV] += 4 * nsupc * (nsupc - 1);
-       ops[GEMV] += 8 * nrow * nsupc;
-#ifdef _CRAY
-       CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-       ztrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-       alpha = none; beta = one; /* y := beta*y + alpha*A*x */
-#ifdef _CRAY
-       CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       zgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       zlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       zmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
-               &lusup[ufirst], tempv );
-        /* Copy updates from tempv[*] into lusup[*] */
-       isub = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           z_sub(&lusup[isub], &lusup[isub], &tempv[i]);
-           tempv[i] = zero;
-           ++isub;
-       }
-    } /* if fst_col < jcol ... */ 
-    return 0;
diff --git a/superlu/zcolumn_dfs.c b/superlu/zcolumn_dfs.c
deleted file mode 100644
index 0c7f5a07..00000000
--- a/superlu/zcolumn_dfs.c
+++ /dev/null
@@ -1,266 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_zdefs.h"
-/* What type of supernodes we want */
-#define T2_SUPER
-          const int  m,         /* in - number of rows in the matrix */
-          const int  jcol,      /* in */
-          int        *perm_r,   /* in */
-          int        *nseg,     /* modified - with new segments appended */
-          int        *lsub_col, /* in - defines the RHS vector to start the 
dfs */
-          int        *segrep,   /* modified - with new segments appended */
-          int        *repfnz,   /* modified */
-          int        *xprune,   /* modified */
-          int        *marker,   /* modified */
-          int        *parent,   /* working array */
-          int        *xplore,   /* working array */
-          GlobalLU_t *Glu       /* modified */
-          )
- * Purpose
- * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
- *   decide the supernode boundary.
- *
- *   This routine does not use numeric values, but only use the RHS 
- *   row indices to start the dfs.
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives. The routine returns a list of such supernodal 
- *   representatives in topological order of the dfs that generates them.
- *   The location of the first nonzero in each such supernodal segment
- *   (supernodal entry location) is also returned.
- *
- * Local parameters
- * ================
- *   nseg: no of segments in current U[*,j]
- *   jsuper: jsuper=EMPTY if column j does not belong to the same
- *     supernode as j-1. Otherwise, jsuper=nsuper.
- *
- *   marker2: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- * Return value
- * ============
- *     0  success;
- *   > 0  number of bytes allocated when run out of space.
- *
- */
-    int     jcolp1, jcolm1, jsuper, nsuper, nextl;
-    int     k, krep, krow, kmark, kperm;
-    int     *marker2;           /* Used for small panel LU */
-    int            fsupc;              /* First column of a snode */
-    int     myfnz;             /* First nonz column of a U-segment */
-    int            chperm, chmark, chrep, kchild;
-    int     xdfs, maxdfs, kpar, oldrep;
-    int     jptr, jm1ptr;
-    int     ito, ifrom, istop; /* Used to compress row subscripts */
-    int     mem_error;
-    int     *xsup, *supno, *lsub, *xlsub;
-    int     nzlmax;
-    static  int  first = 1, maxsuper;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       first = 0;
-    }
-    jcolp1  = jcol + 1;
-    jcolm1  = jcol - 1;
-    nsuper  = supno[jcol];
-    jsuper  = nsuper;
-    nextl   = xlsub[jcol];
-    marker2 = &marker[2*m];
-    /* For each nonzero in A[*,jcol] do dfs */
-    for (k = 0; lsub_col[k] != EMPTY; k++) {
-       krow = lsub_col[k];
-       lsub_col[k] = EMPTY;
-       kmark = marker2[krow];          
-       /* krow was visited before, go to the next nonz */
-        if ( kmark == jcol ) continue; 
-       /* For each unmarked nbr krow of jcol
-        *      krow is in L: place it in structure of L[*,jcol]
-        */
-       marker2[krow] = jcol;
-       kperm = perm_r[krow];
-       if ( kperm == EMPTY ) {
-           lsub[nextl++] = krow;       /* krow is indexed into A */
-           if ( nextl >= nzlmax ) {
-               if ( mem_error = zLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-                   return (mem_error);
-               lsub = Glu->lsub;
-           }
-            if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing 
-       } else {
-           /*  krow is in U: if its supernode-rep krep
-            *  has been explored, update repfnz[*]
-            */
-           krep = xsup[supno[kperm]+1] - 1;
-           myfnz = repfnz[krep];
-           if ( myfnz != EMPTY ) {     /* Visited before */
-               if ( myfnz > kperm ) repfnz[krep] = kperm;
-               /* continue; */
-           }
-           else {
-               /* Otherwise, perform dfs starting at krep */
-               oldrep = EMPTY;
-               parent[krep] = oldrep;
-               repfnz[krep] = kperm;
-               xdfs = xlsub[krep];
-               maxdfs = xprune[krep];
-               do {
-                   /* 
-                    * For each unmarked kchild of krep 
-                    */
-                   while ( xdfs < maxdfs ) {
-                       kchild = lsub[xdfs];
-                       xdfs++;
-                       chmark = marker2[kchild];
-                       if ( chmark != jcol ) { /* Not reached yet */
-                           marker2[kchild] = jcol;
-                           chperm = perm_r[kchild];
-                           /* Case kchild is in L: place it in L[*,k] */
-                           if ( chperm == EMPTY ) {
-                               lsub[nextl++] = kchild;
-                               if ( nextl >= nzlmax ) {
-                                   if ( mem_error =
zLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu) )
-                                       return (mem_error);
-                                   lsub = Glu->lsub;
-                               }
-                               if ( chmark != jcolm1 ) jsuper = EMPTY;
-                           } else {
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               chrep = xsup[supno[chperm]+1] - 1;
-                               myfnz = repfnz[chrep];
-                               if ( myfnz != EMPTY ) { /* Visited before */
-                                   if ( myfnz > chperm )
-                                       repfnz[chrep] = chperm;
-                               } else {
-                                   /* Continue dfs at super-rep of kchild */
-                                   xplore[krep] = xdfs;        
-                                   oldrep = krep;
-                                   krep = chrep; /* Go deeper down G(L^t) */
-                                   parent[krep] = oldrep;
-                                   repfnz[krep] = chperm;
-                                   xdfs = xlsub[krep];     
-                                   maxdfs = xprune[krep];
-                               } /* else */
-                          } /* else */
-                       } /* if */
-                   } /* while */
-                   /* krow has no more unexplored nbrs;
-                    *    place supernode-rep krep in postorder DFS.
-                    *    backtrack dfs to its parent
-                    */
-                   segrep[*nseg] = krep;
-                   ++(*nseg);
-                   kpar = parent[krep]; /* Pop from stack, mimic recursion */
-                   if ( kpar == EMPTY ) break; /* dfs done */
-                   krep = kpar;
-                   xdfs = xplore[krep];
-                   maxdfs = xprune[krep];
-               } while ( kpar != EMPTY );      /* Until empty stack */
-           } /* else */
-       } /* else */
-    } /* for each nonzero ... */
-    /* Check to see if j belongs in the same supernode as j-1 */
-    if ( jcol == 0 ) { /* Do nothing for column 0 */
-       nsuper = supno[0] = 0;
-    } else {
-       fsupc = xsup[nsuper];
-       jptr = xlsub[jcol];     /* Not compressed yet */
-       jm1ptr = xlsub[jcolm1];
-#ifdef T2_SUPER
-       if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
-       /* Make sure the number of columns in a supernode doesn't
-          exceed threshold. */
-       if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
-       /* If jcol starts a new supernode, reclaim storage space in
-        * lsub from the previous supernode. Note we only store
-        * the subscript set of the first and last columns of
-        * a supernode. (first for num values, last for pruning)
-        */
-       if ( jsuper == EMPTY ) {        /* starts a new supernode */
-           if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
-               printf("  Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
-               ito = xlsub[fsupc+1];
-               xlsub[jcolm1] = ito;
-               istop = ito + jptr - jm1ptr;
-               xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
-               xlsub[jcol] = istop;
-               for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
-                   lsub[ito] = lsub[ifrom];
-               nextl = ito;            /* = istop + length(jcol) */
-           }
-           nsuper++;
-           supno[jcol] = nsuper;
-       } /* if a new supernode */
-    }  /* else: jcol > 0 */ 
-    /* Tidy up the pointers before exit */
-    xsup[nsuper+1] = jcolp1;
-    supno[jcolp1]  = nsuper;
-    xprune[jcol]   = nextl;    /* Initialize upper bound for pruning */
-    xlsub[jcolp1]  = nextl;
-    return 0;
diff --git a/superlu/zcopy_to_ucol.c b/superlu/zcopy_to_ucol.c
deleted file mode 100644
index 375a50bc..00000000
--- a/superlu/zcopy_to_ucol.c
+++ /dev/null
@@ -1,112 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_zdefs.h"
-             int        jcol,    /* in */
-             int        nseg,    /* in */
-             int        *segrep,  /* in */
-             int        *repfnz,  /* in */
-             int        *perm_r,  /* in */
-             doublecomplex     *dense,   /* modified - reset to zero on return 
-             GlobalLU_t *Glu      /* modified */
-             )
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int ksub, krep, ksupno;
-    int i, k, kfnz, segsze;
-    int fsupc, isub, irow;
-    int jsupno, nextu;
-    int new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    doublecomplex    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-    doublecomplex zero = {0.0, 0.0};
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k--];
-       ksupno = supno[krep];
-       if ( ksupno != jsupno ) { /* Should go into ucol[] */
-           kfnz = repfnz[krep];
-           if ( kfnz != EMPTY ) {      /* Nonzero U-segment */
-               fsupc = xsup[ksupno];
-               isub = xlsub[fsupc] + kfnz - fsupc;
-               segsze = krep - kfnz + 1;
-               new_next = nextu + segsze;
-               while ( new_next > nzumax ) {
-                   if (mem_error = zLUMemXpand(jcol, nextu, UCOL, &nzumax, 
-                       return (mem_error);
-                   ucol = Glu->ucol;
-                   if (mem_error = zLUMemXpand(jcol, nextu, USUB, &nzumax, 
-                       return (mem_error);
-                   usub = Glu->usub;
-                   lsub = Glu->lsub;
-               }
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   usub[nextu] = perm_r[irow];
-                   ucol[nextu] = dense[irow];
-                   dense[irow] = zero;
-                   nextu++;
-                   isub++;
-               } 
-           }
-       }
-    } /* for each segment... */
-    xusub[jcol + 1] = nextu;      /* Close U[*,jcol] */
-    return 0;
diff --git a/superlu/zgscon.c b/superlu/zgscon.c
deleted file mode 100644
index 2014c9c0..00000000
--- a/superlu/zgscon.c
+++ /dev/null
@@ -1,152 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * File name:  zgscon.c
- * History:     Modified from lapack routines ZGECON.
- */
-#include <math.h>
-#include "slu_zdefs.h"
-zgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
-       double anorm, double *rcond, SuperLUStat_t *stat, int *info)
-    Purpose   
-    =======   
-    ZGSCON estimates the reciprocal of the condition number of a general 
-    real matrix A, in either the 1-norm or the infinity-norm, using   
-    the LU factorization computed by ZGETRF.   
-    An estimate is obtained for norm(inv(A)), and the reciprocal of the   
-    condition number is computed as   
-       RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    NORM    (input) char*
-            Specifies whether the 1-norm condition number or the   
-            infinity-norm condition number is required:   
-            = '1' or 'O':  1-norm;   
-            = 'I':         Infinity-norm.
-    L       (input) SuperMatrix*
-            The factor L from the factorization Pr*A*Pc=L*U as computed by
-            zgstrf(). Use compressed row subscripts storage for supernodes,
-            i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
-    U       (input) SuperMatrix*
-            The factor U from the factorization Pr*A*Pc=L*U as computed by
-            zgstrf(). Use column-wise storage scheme, i.e., U has types:
-            Stype = SLU_NC, Dtype = SLU_Z, Mtype = TRU.
-    ANORM   (input) double
-            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
-            If NORM = 'I', the infinity-norm of the original matrix A.
-    RCOND   (output) double*
-            The reciprocal of the condition number of the matrix A,   
-            computed as RCOND = 1/(norm(A) * norm(inv(A))).
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-    ===================================================================== 
-    /* Local variables */
-    int    kase, kase1, onenrm, i;
-    double ainvnm;
-    doublecomplex *work;
-    extern int zrscl_(int *, doublecomplex *, doublecomplex *, int *);
-    extern int zlacon_(int *, doublecomplex *, doublecomplex *, double *, int 
-    /* Test the input parameters. */
-    *info = 0;
-    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
-    if (! onenrm && ! lsame_(norm, "I")) *info = -1;
-    else if (L->nrow < 0 || L->nrow != L->ncol ||
-             L->Stype != SLU_SC || L->Dtype != SLU_Z || L->Mtype != SLU_TRLU)
-        *info = -2;
-    else if (U->nrow < 0 || U->nrow != U->ncol ||
-             U->Stype != SLU_NC || U->Dtype != SLU_Z || U->Mtype != SLU_TRU) 
-       *info = -3;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("zgscon", &i);
-       return;
-    }
-    /* Quick return if possible */
-    *rcond = 0.;
-    if ( L->nrow == 0 || U->nrow == 0) {
-       *rcond = 1.;
-       return;
-    }
-    work = doublecomplexCalloc( 3*L->nrow );
-    if ( !work )
-       ABORT("Malloc fails for work arrays in zgscon.");
-    /* Estimate the norm of inv(A). */
-    ainvnm = 0.;
-    if ( onenrm ) kase1 = 1;
-    else kase1 = 2;
-    kase = 0;
-    do {
-       zlacon_(&L->nrow, &work[L->nrow], &work[0], &ainvnm, &kase);
-       if (kase == 0) break;
-       if (kase == kase1) {
-           /* Multiply by inv(L). */
-           sp_ztrsv("L", "No trans", "Unit", L, U, &work[0], stat, info);
-           /* Multiply by inv(U). */
-           sp_ztrsv("U", "No trans", "Non-unit", L, U, &work[0], stat, info);
-       } else {
-           /* Multiply by inv(U'). */
-           sp_ztrsv("U", "Transpose", "Non-unit", L, U, &work[0], stat, info);
-           /* Multiply by inv(L'). */
-           sp_ztrsv("L", "Transpose", "Unit", L, U, &work[0], stat, info);
-       }
-    } while ( kase != 0 );
-    /* Compute the estimate of the reciprocal condition number. */
-    if (ainvnm != 0.) *rcond = (1. / ainvnm) / anorm;
-    SUPERLU_FREE (work);
-    return;
-} /* zgscon */
diff --git a/superlu/zgsequ.c b/superlu/zgsequ.c
deleted file mode 100644
index 4f9f5207..00000000
--- a/superlu/zgsequ.c
+++ /dev/null
@@ -1,205 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  zgsequ.c
- * History:     Modified from LAPACK routine ZGEEQU
- */
-#include <math.h>
-#include "slu_zdefs.h"
-zgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd,
-       double *colcnd, double *amax, int *info)
-    Purpose   
-    =======   
-    ZGSEQU computes row and column scalings intended to equilibrate an   
-    M-by-N sparse matrix A and reduce its condition number. R returns the row
-    scale factors and C the column scale factors, chosen to try to make   
-    the largest element in each row and column of the matrix B with   
-    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
-    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
-    number and BIGNUM = largest safe number.  Use of these scaling   
-    factors is not guaranteed to reduce the condition number of A but   
-    works well in practice.   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    A       (input) SuperMatrix*
-            The matrix of dimension (A->nrow, A->ncol) whose equilibration
-            factors are to be computed. The type of A can be:
-            Stype = SLU_NC; Dtype = SLU_Z; Mtype = SLU_GE.
-    R       (output) double*, size A->nrow
-            If INFO = 0 or INFO > M, R contains the row scale factors   
-            for A.
-    C       (output) double*, size A->ncol
-            If INFO = 0,  C contains the column scale factors for A.
-    ROWCND  (output) double*
-            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
-            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
-            AMAX is neither too large nor too small, it is not worth   
-            scaling by R.
-    COLCND  (output) double*
-            If INFO = 0, COLCND contains the ratio of the smallest   
-            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
-            worth scaling by C.
-    AMAX    (output) double*
-            Absolute value of largest matrix element.  If AMAX is very   
-            close to overflow or very close to underflow, the matrix   
-            should be scaled.
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-            > 0:  if INFO = i,  and i is   
-                  <= A->nrow:  the i-th row of A is exactly zero   
-                  >  A->ncol:  the (i-M)-th column of A is exactly zero   
-    ===================================================================== 
-    /* Local variables */
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    int i, j, irow;
-    double rcmin, rcmax;
-    double bignum, smlnum;
-    extern double dlamch_(char *);
-    /* Test the input parameters. */
-    *info = 0;
-    if ( A->nrow < 0 || A->ncol < 0 ||
-        A->Stype != SLU_NC || A->Dtype != SLU_Z || A->Mtype != SLU_GE )
-       *info = -1;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("zgsequ", &i);
-       return;
-    }
-    /* Quick return if possible */
-    if ( A->nrow == 0 || A->ncol == 0 ) {
-       *rowcnd = 1.;
-       *colcnd = 1.;
-       *amax = 0.;
-       return;
-    }
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Get machine constants. */
-    smlnum = dlamch_("S");
-    bignum = 1. / smlnum;
-    /* Compute row scale factors. */
-    for (i = 0; i < A->nrow; ++i) r[i] = 0.;
-    /* Find the maximum element in each row. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           r[irow] = SUPERLU_MAX( r[irow], z_abs1(&Aval[i]) );
-       }
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (i = 0; i < A->nrow; ++i) {
-       rcmax = SUPERLU_MAX(rcmax, r[i]);
-       rcmin = SUPERLU_MIN(rcmin, r[i]);
-    }
-    *amax = rcmax;
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (i = 0; i < A->nrow; ++i)
-           if (r[i] == 0.) {
-               *info = i + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (i = 0; i < A->nrow; ++i)
-           r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum );
-       /* Compute ROWCND = min(R(I)) / max(R(I)) */
-       *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-    /* Compute column scale factors */
-    for (j = 0; j < A->ncol; ++j) c[j] = 0.;
-    /* Find the maximum element in each column, assuming the row
-       scalings computed above. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           c[j] = SUPERLU_MAX( c[j], z_abs1(&Aval[i]) * r[irow] );
-       }
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (j = 0; j < A->ncol; ++j) {
-       rcmax = SUPERLU_MAX(rcmax, c[j]);
-       rcmin = SUPERLU_MIN(rcmin, c[j]);
-    }
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (j = 0; j < A->ncol; ++j)
-           if ( c[j] == 0. ) {
-               *info = A->nrow + j + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (j = 0; j < A->ncol; ++j)
-           c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum);
-       /* Compute COLCND = min(C(J)) / max(C(J)) */
-       *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-    return;
-} /* zgsequ */
diff --git a/superlu/zgsrfs.c b/superlu/zgsrfs.c
deleted file mode 100644
index e87c3616..00000000
--- a/superlu/zgsrfs.c
+++ /dev/null
@@ -1,456 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * File name:  zgsrfs.c
- * History:     Modified from lapack routine ZGERFS
- */
-#include <math.h>
-#include "slu_zdefs.h"
-zgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
-       int *perm_c, int *perm_r, char *equed, double *R, double *C,
-       SuperMatrix *B, SuperMatrix *X, double *ferr, double *berr,
-       SuperLUStat_t *stat, int *info)
- *   Purpose   
- *   =======   
- *
- *   ZGSRFS improves the computed solution to a system of linear   
- *   equations and provides error bounds and backward error estimates for 
- *   the solution.   
- *
- *   If equilibration was performed, the system becomes:
- *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- *   Arguments   
- *   =========   
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *   
- *   A       (input) SuperMatrix*
- *           The original matrix A in the system, or the scaled A if
- *           equilibration was done. The type of A can be:
- *           Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_GE.
- *    
- *   L       (input) SuperMatrix*
- *          The factor L from the factorization Pr*A*Pc=L*U. Use
- *           compressed row subscripts storage for supernodes, 
- *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = 
- * 
- *   U       (input) SuperMatrix*
- *           The factor U from the factorization Pr*A*Pc=L*U as computed by
- *           zgstrf(). Use column-wise storage scheme, 
- *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- *   perm_c  (input) int*, dimension (A->ncol)
- *          Column permutation vector, which defines the 
- *           permutation matrix Pc; perm_c[i] = j means column i of A is 
- *           in position j in A*Pc.
- *
- *   perm_r  (input) int*, dimension (A->nrow)
- *           Row permutation vector, which defines the permutation matrix Pr;
- *           perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- *   equed   (input) Specifies the form of equilibration that was done.
- *           = 'N': No equilibration.
- *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *           = 'C': Column equilibration, i.e., A was postmultiplied by
- *                  diag(C).
- *           = 'B': Both row and column equilibration, i.e., A was replaced 
- *                  by diag(R)*A*diag(C).
- *
- *   R       (input) double*, dimension (A->nrow)
- *           The row scale factors for A.
- *           If equed = 'R' or 'B', A is premultiplied by diag(R).
- *           If equed = 'N' or 'C', R is not accessed.
- * 
- *   C       (input) double*, dimension (A->ncol)
- *           The column scale factors for A.
- *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
- *           If equed = 'N' or 'R', C is not accessed.
- *
- *   B       (input) SuperMatrix*
- *           B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- *           The right hand side matrix B.
- *           if equed = 'R' or 'B', B is premultiplied by diag(R).
- *
- *   X       (input/output) SuperMatrix*
- *           X has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- *           On entry, the solution matrix X, as computed by zgstrs().
- *           On exit, the improved solution matrix X.
- *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
- *               in order to obtain the solution to the original system.
- *
- *   FERR    (output) double*, dimension (B->ncol)   
- *           The estimated forward error bound for each solution vector   
- *           X(j) (the j-th column of the solution matrix X).   
- *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *           is an estimated upper bound for the magnitude of the largest 
- *           element in (X(j) - XTRUE) divided by the magnitude of the   
- *           largest element in X(j).  The estimate is as reliable as   
- *           the estimate for RCOND, and is almost always a slight   
- *           overestimate of the true error.
- *
- *   BERR    (output) double*, dimension (B->ncol)   
- *           The componentwise relative backward error of each solution   
- *           vector X(j) (i.e., the smallest relative change in   
- *           any element of A or B that makes X(j) an exact solution).
- *
- *   stat     (output) SuperLUStat_t*
- *            Record the statistics on runtime and floating-point operation 
- *            See util.h for the definition of 'SuperLUStat_t'.
- *
- *   info    (output) int*   
- *           = 0:  successful exit   
- *            < 0:  if INFO = -i, the i-th argument had an illegal value   
- *
- *    Internal Parameters   
- *    ===================   
- *
- *    ITMAX is the maximum number of steps of iterative refinement.   
- *
- */  
-#define ITMAX 5
-    /* Table of constant values */
-    int    ione = 1;
-    doublecomplex ndone = {-1., 0.};
-    doublecomplex done = {1., 0.};
-    /* Local variables */
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    SuperMatrix Bjcol;
-    DNformat *Bstore, *Xstore, *Bjcol_store;
-    doublecomplex   *Bmat, *Xmat, *Bptr, *Xptr;
-    int      kase;
-    double   safe1, safe2;
-    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
-    int      ldb, ldx, nrhs;
-    double   s, xk, lstres, eps, safmin;
-    char     transc[1];
-    trans_t  transt;
-    doublecomplex   *work;
-    double   *rwork;
-    int      *iwork;
-    extern double dlamch_(char *);
-    extern int zlacon_(int *, doublecomplex *, doublecomplex *, double *, int 
-#ifdef _CRAY
-    extern int CCOPY(int *, doublecomplex *, int *, doublecomplex *, int *);
-    extern int CSAXPY(int *, doublecomplex *, doublecomplex *, int *, 
doublecomplex *, int *);
-    extern int zcopy_(int *, doublecomplex *, int *, doublecomplex *, int *);
-    extern int zaxpy_(int *, doublecomplex *, doublecomplex *, int *, 
doublecomplex *, int *);
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    /* Test the input parameters */
-    *info = 0;
-    notran = (trans == NOTRANS);
-    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             A->Stype != SLU_NC || A->Dtype != SLU_Z || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_Z || L->Mtype != SLU_TRLU )
-       *info = -3;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_Z || U->Mtype != SLU_TRU )
-       *info = -4;
-    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_Z || B->Mtype != SLU_GE )
-        *info = -10;
-    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
-             X->Stype != SLU_DN || X->Dtype != SLU_Z || X->Mtype != SLU_GE )
-       *info = -11;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("zgsrfs", &i);
-       return;
-    }
-    /* Quick return if possible */
-    if ( A->nrow == 0 || nrhs == 0) {
-       for (j = 0; j < nrhs; ++j) {
-           ferr[j] = 0.;
-           berr[j] = 0.;
-       }
-       return;
-    }
-    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-    colequ = lsame_(equed, "C") || lsame_(equed, "B");
-    /* Allocate working space */
-    work = doublecomplexMalloc(2*A->nrow);
-    rwork = (double *) SUPERLU_MALLOC( A->nrow * sizeof(double) );
-    iwork = intMalloc(A->nrow);
-    if ( !work || !rwork || !iwork ) 
-        ABORT("Malloc fails for work/rwork/iwork.");
-    if ( notran ) {
-       *(unsigned char *)transc = 'N';
-        transt = TRANS;
-    } else {
-       *(unsigned char *)transc = 'T';
-       transt = NOTRANS;
-    }
-    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
-    nz     = A->ncol + 1;
-    eps    = dlamch_("Epsilon");
-    safmin = dlamch_("Safe minimum");
-    safe1  = nz * safmin;
-    safe2  = safe1 / eps;
-    /* Compute the number of nonzeros in each row (or column) of A */
-    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
-    if ( notran ) {
-       for (k = 0; k < A->ncol; ++k)
-           for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
-               ++iwork[Astore->rowind[i]];
-    } else {
-       for (k = 0; k < A->ncol; ++k)
-           iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
-    }  
-    /* Copy one column of RHS B into Bjcol. */
-    Bjcol.Stype = B->Stype;
-    Bjcol.Dtype = B->Dtype;
-    Bjcol.Mtype = B->Mtype;
-    Bjcol.nrow  = B->nrow;
-    Bjcol.ncol  = 1;
-    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
-    Bjcol_store = Bjcol.Store;
-    Bjcol_store->lda = ldb;
-    Bjcol_store->nzval = work; /* address aliasing */
-    /* Do for each right hand side ... */
-    for (j = 0; j < nrhs; ++j) {
-       count = 0;
-       lstres = 3.;
-       Bptr = &Bmat[j*ldb];
-       Xptr = &Xmat[j*ldx];
-       while (1) { /* Loop until stopping criterion is satisfied. */
-           /* Compute residual R = B - op(A) * X,   
-              where op(A) = A, A**T, or A**H, depending on TRANS. */
-#ifdef _CRAY
-           CCOPY(&A->nrow, Bptr, &ione, work, &ione);
-           zcopy_(&A->nrow, Bptr, &ione, work, &ione);
-           sp_zgemv(transc, ndone, A, Xptr, ione, done, work, ione);
-           /* Compute componentwise relative backward error from formula 
-              max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
-              where abs(Z) is the componentwise absolute value of the matrix
-              or vector Z.  If the i-th component of the denominator is less
-              than SAFE2, then SAFE1 is added to the i-th component of the   
-              numerator and denominator before dividing. */
-           for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] );
-           /* Compute abs(op(A))*abs(X) + abs(B). */
-           if (notran) {
-               for (k = 0; k < A->ncol; ++k) {
-                   xk = z_abs1( &Xptr[k] );
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                       rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk;
-               }
-           } else {
-               for (k = 0; k < A->ncol; ++k) {
-                   s = 0.;
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       s += z_abs1(&Aval[i]) * z_abs1(&Xptr[irow]);
-                   }
-                   rwork[k] += s;
-               }
-           }
-           s = 0.;
-           for (i = 0; i < A->nrow; ++i) {
-               if (rwork[i] > safe2)
-                   s = SUPERLU_MAX( s, z_abs1(&work[i]) / rwork[i] );
-               else
-                   s = SUPERLU_MAX( s, (z_abs1(&work[i]) + safe1) / 
-                               (rwork[i] + safe1) );
-           }
-           berr[j] = s;
-           /* Test stopping criterion. Continue iterating if   
-              1) The residual BERR(J) is larger than machine epsilon, and   
-              2) BERR(J) decreased by at least a factor of 2 during the   
-                 last iteration, and   
-              3) At most ITMAX iterations tried. */
-           if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
-               /* Update solution and try again. */
-               zgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-#ifdef _CRAY
-               CAXPY(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-               zaxpy_(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-               lstres = berr[j];
-               ++count;
-           } else {
-               break;
-           }
-       } /* end while */
-       stat->RefineSteps = count;
-       /* Bound error from formula:
-          norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
-          ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
-          where   
-            norm(Z) is the magnitude of the largest component of Z   
-            inv(op(A)) is the inverse of op(A)   
-            abs(Z) is the componentwise absolute value of the matrix or
-              vector Z   
-            NZ is the maximum number of nonzeros in any row of A, plus 1   
-            EPS is machine epsilon   
-          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
-          is incremented by SAFE1 if the i-th component of   
-          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   
-          Use ZLACON to estimate the infinity-norm of the matrix   
-             inv(op(A)) * diag(W),   
-          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
-       for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] );
-       /* Compute abs(op(A))*abs(X) + abs(B). */
-       if ( notran ) {
-           for (k = 0; k < A->ncol; ++k) {
-               xk = z_abs1( &Xptr[k] );
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                   rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk;
-           }
-       } else {
-           for (k = 0; k < A->ncol; ++k) {
-               s = 0.;
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   xk = z_abs1( &Xptr[irow] );
-                   s += z_abs1(&Aval[i]) * xk;
-               }
-               rwork[k] += s;
-           }
-       }
-       for (i = 0; i < A->nrow; ++i)
-           if (rwork[i] > safe2)
-               rwork[i] = z_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
-           else
-               rwork[i] = z_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
-       kase = 0;
-       do {
-           zlacon_(&A->nrow, &work[A->nrow], work,
-                   &ferr[j], &kase);
-           if (kase == 0) break;
-           if (kase == 1) {
-               /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       zd_mult(&work[i], &work[i], C[i]);
-                   }
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->nrow; ++i) {
-                       zd_mult(&work[i], &work[i], R[i]);
-                    }
-               zgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               for (i = 0; i < A->nrow; ++i) {
-                   zd_mult(&work[i], &work[i], rwork[i]);
-               }
-           } else {
-               /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
-               for (i = 0; i < A->nrow; ++i) {
-                   zd_mult(&work[i], &work[i], rwork[i]);
-               }
-               zgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       zd_mult(&work[i], &work[i], C[i]);
-                   }
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       zd_mult(&work[i], &work[i], R[i]);  
-                   }
-           }
-       } while ( kase != 0 );
-       /* Normalize error. */
-       lstres = 0.;
-       if ( notran && colequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, C[i] * z_abs1( &Xptr[i]) );
-       } else if ( !notran && rowequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, R[i] * z_abs1( &Xptr[i]) );
-       } else {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, z_abs1( &Xptr[i]) );
-       }
-       if ( lstres != 0. )
-           ferr[j] /= lstres;
-    } /* for each RHS j ... */
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(rwork);
-    SUPERLU_FREE(iwork);
-    SUPERLU_FREE(Bjcol.Store);
-    return;
-} /* zgsrfs */
diff --git a/superlu/zgssv.c b/superlu/zgssv.c
deleted file mode 100644
index 73bf9a86..00000000
--- a/superlu/zgssv.c
+++ /dev/null
@@ -1,230 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_zdefs.h"
-zgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-      SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
-      SuperLUStat_t *stat, int *info )
- * Purpose
- * =======
- *
- * ZGSSV solves the system of linear equations A*X=B, using the
- * LU factorization from ZGSTRF. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *
- *      1.1. Permute the columns of A, forming A*Pc, where Pc
- *           is a permutation matrix. For more details of this step, 
- *           see sp_preorder.c.
- *
- *      1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
- *           by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      1.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the
- *      above algorithm to the transpose of A:
- *
- *      2.1. Permute columns of transpose(A) (rows of A),
- *           forming transpose(A)*Pc, where Pc is a permutation matrix. 
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
- *           determined by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      2.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- * 
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR; Dtype = SLU_Z; Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- * perm_c  (input/output) int*
- *         If A->Stype = SLU_NC, column permutation vector of size A->ncol
- *         which defines the permutation matrix Pc; perm_c[i] = j means 
- *         column i of A is in position j in A*Pc.
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- *         If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
- *            options->Fact = SamePattern_SameRowPerm, it is an input argument.
- *            On exit, perm_c may be overwritten by the product of the input
- *            perm_c and a permutation that postorders the elimination tree
- *            of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *            is already in postorder.
- *         Otherwise, it is an output argument.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined 
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->RowPerm = MY_PERMR or
- *            options->Fact = SamePattern_SameRowPerm, perm_r is an
- *            input argument.
- *         otherwise it is an output argument.
- *
- * L       (output) SuperMatrix*
- *         The factor L from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- *         
- * U       (output) SuperMatrix*
- *        The factor U from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *         > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                so the solution could not be computed.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol.
- *   
- */
-    DNformat *Bstore;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int      lwork = 0, *etree, i;
-    /* Set default values for some parameters */
-    double   drop_tol = 0.;
-    int      panel_size;     /* panel size */
-    int      relax;          /* no of columns in a relaxed snodes */
-    int      permc_spec;
-    trans_t  trans = NOTRANS;
-    double   *utime;
-    double   t;        /* Temporary time */
-    /* Test the input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    if ( options->Fact != DOFACT ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-        (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-        A->Dtype != SLU_Z || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-       B->Stype != SLU_DN || B->Dtype != SLU_Z || B->Mtype != SLU_GE )
-       *info = -7;
-    if ( *info != 0 ) {
-       i = -(*info);
-       xerbla_("zgssv", &i);
-       return;
-    }
-    utime = stat->utime;
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       zCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       trans = TRANS;
-    } else {
-        if ( A->Stype == SLU_NC ) AA = A;
-    }
-    t = SuperLU_timer_();
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = NATURAL:  natural ordering 
-     *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-     *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-     *   permc_spec = COLAMD:   approximate minimum degree column ordering
-     *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-     */
-    permc_spec = options->ColPerm;
-    if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-      get_perm_c(permc_spec, AA, perm_c);
-    utime[COLPERM] = SuperLU_timer_() - t;
-    etree = intMalloc(A->ncol);
-    t = SuperLU_timer_();
-    sp_preorder(options, AA, perm_c, etree, &AC);
-    utime[ETREE] = SuperLU_timer_() - t;
-    panel_size = sp_ienv(1);
-    relax = sp_ienv(2);
-    /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-         relax, panel_size, sp_ienv(3), sp_ienv(4));*/
-    t = SuperLU_timer_(); 
-    /* Compute the LU factorization of A. */
-    zgstrf(options, &AC, drop_tol, relax, panel_size,
-          etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
-    utime[FACT] = SuperLU_timer_() - t;
-    t = SuperLU_timer_();
-    if ( *info == 0 ) {
-        /* Solve the system A*X=B, overwriting B with X. */
-        zgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
-    }
-    utime[SOLVE] = SuperLU_timer_() - t;
-    SUPERLU_FREE (etree);
-    Destroy_CompCol_Permuted(&AC);
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-    }
diff --git a/superlu/zgssvx.c b/superlu/zgssvx.c
deleted file mode 100644
index 09979b5e..00000000
--- a/superlu/zgssvx.c
+++ /dev/null
@@ -1,623 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_zdefs.h"
-zgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-       int *etree, char *equed, double *R, double *C,
-       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
-       SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, 
-       double *rcond, double *ferr, double *berr, 
-       mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info )
- * Purpose
- * =======
- *
- * ZGSSVX solves the system of linear equations A*X=B or A'*X=B, using
- * the LU factorization from zgstrf(). Error bounds on the solution and
- * a condition estimate are also provided. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *  
- *      1.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A is
- *           overwritten by diag(R)*A*diag(C) and B by diag(R)*B
- *           (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
- *           = TRANS or CONJ).
- *
- *      1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
- *           matrix that usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      1.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the matrix A (after equilibration if options->Equil = YES)
- *           as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
- *
- *      1.4. Compute the reciprocal pivot growth factor.
- *
- *      1.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form of 
- *           A is used to estimate the condition number of the matrix A. If
- *           the reciprocal of the condition number is less than machine
- *           precision, info = A->ncol+1 is returned as a warning, but the
- *           routine still goes on to solve for X and computes error bounds
- *           as described below.
- *
- *      1.6. The system of equations is solved for X using the factored form
- *           of A.
- *
- *      1.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      1.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R)
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
- *      to the transpose of A:
- *
- *      2.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A' is
- *           overwritten by diag(R)*A'*diag(C) and B by diag(R)*B 
- *           (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
- *
- *      2.2. Permute columns of transpose(A) (rows of A), 
- *           forming transpose(A)*Pc, where Pc is a permutation matrix that 
- *           usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the transpose(A) (after equilibration if 
- *           options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
- *           permutation Pr determined by partial pivoting.
- *
- *      2.4. Compute the reciprocal pivot growth factor.
- *
- *      2.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form 
- *           of transpose(A) is used to estimate the condition number of the
- *           matrix A. If the reciprocal of the condition number
- *           is less than machine precision, info = A->nrow+1 is returned as
- *           a warning, but the routine still goes on to solve for X and
- *           computes error bounds as described below.
- *
- *      2.6. The system of equations is solved for X using the factored form
- *           of transpose(A).
- *
- *      2.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      2.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R) 
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input/output) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- *         On entry, If options->Fact = FACTORED and equed is not 'N', 
- *         then A must have been equilibrated by the scaling factors in
- *         R and/or C.  
- *         On exit, A is not modified if options->Equil = NO, or if 
- *         options->Equil = YES but equed = 'N' on exit.
- *         Otherwise, if options->Equil = YES and equed is not 'N',
- *         A is scaled as follows:
- *         If A->Stype = SLU_NC:
- *           equed = 'R':  A := diag(R) * A
- *           equed = 'C':  A := A * diag(C)
- *           equed = 'B':  A := diag(R) * A * diag(C).
- *         If A->Stype = SLU_NR:
- *           equed = 'R':  transpose(A) := diag(R) * transpose(A)
- *           equed = 'C':  transpose(A) := transpose(A) * diag(C)
- *           equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
- *
- * perm_c  (input/output) int*
- *        If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
- *         which defines the permutation matrix Pc; perm_c[i] = j means
- *         column i of A is in position j in A*Pc.
- *         On exit, perm_c may be overwritten by the product of the input
- *         perm_c and a permutation that postorders the elimination tree
- *         of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *         is already in postorder.
- *
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow,
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *         will try to use the input perm_r, unless a certain threshold
- *         criterion is violated. In that case, perm_r is overwritten by a
- *         new permutation determined by partial pivoting or diagonal
- *         threshold pivoting.
- *         Otherwise, perm_r is output argument.
- * 
- * etree   (input/output) int*,  dimension (A->ncol)
- *         Elimination tree of Pc'*A'*A*Pc.
- *         If options->Fact != FACTORED and options->Fact != DOFACT,
- *         etree is an input argument, otherwise it is an output argument.
- *         Note: etree is a vector of parent pointers for a forest whose
- *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *
- * equed   (input/output) char*
- *         Specifies the form of equilibration that was done.
- *         = 'N': No equilibration.
- *         = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *         = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
- *         = 'B': Both row and column equilibration, i.e., A was replaced 
- *                by diag(R)*A*diag(C).
- *         If options->Fact = FACTORED, equed is an input argument,
- *         otherwise it is an output argument.
- *
- * R       (input/output) double*, dimension (A->nrow)
- *         The row scale factors for A or transpose(A).
- *         If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
- *         If equed = 'N' or 'C', R is not accessed.
- *         If options->Fact = FACTORED, R is an input argument,
- *             otherwise, R is output.
- *         If options->zFact = FACTORED and equed = 'R' or 'B', each element
- *             of R must be positive.
- * 
- * C       (input/output) double*, dimension (A->ncol)
- *         The column scale factors for A or transpose(A).
- *         If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
- *         If equed = 'N' or 'R', C is not accessed.
- *         If options->Fact = FACTORED, C is an input argument,
- *             otherwise, C is output.
- *         If options->Fact = FACTORED and equed = 'C' or 'B', each element
- *             of C must be positive.
- *         
- * L       (output) SuperMatrix*
- *        The factor L from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype SLU_= NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- *
- * U       (output) SuperMatrix*
- *        The factor U from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- * work    (workspace/output) void*, size (lwork) (in bytes)
- *         User supplied workspace, should be large enough
- *         to hold data structures for factors L and U.
- *         On exit, if fact is not 'F', L and U point to this array.
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               mem_usage->total_needed; no other side effects.
- *
- *         See argument 'mem_usage' for memory usage statistics.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         If B->ncol = 0, only LU decomposition is performed, the triangular
- *                         solve is skipped.
- *         On exit,
- *            if equed = 'N', B is not modified; otherwise
- *            if A->Stype = SLU_NC:
- *               if options->Trans = NOTRANS and equed = 'R' or 'B',
- *                  B is overwritten by diag(R)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
- *                  B is overwritten by diag(C)*B;
- *            if A->Stype = SLU_NR:
- *               if options->Trans = NOTRANS and equed = 'C' or 'B',
- *                  B is overwritten by diag(C)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
- *                  B is overwritten by diag(R)*B.
- *
- * X       (output) SuperMatrix*
- *         X has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE. 
- *         If info = 0 or info = A->ncol+1, X contains the solution matrix
- *         to the original system of equations. Note that A and B are modified
- *         on exit if equed is not 'N', and the solution to the equilibrated
- *         system is inv(diag(C))*X if options->Trans = NOTRANS and
- *         equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
- *         and equed = 'R' or 'B'.
- *
- * recip_pivot_growth (output) double*
- *         The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
- *         The infinity norm is used. If recip_pivot_growth is much less
- *         than 1, the stability of the LU factorization could be poor.
- *
- * rcond   (output) double*
- *         The estimate of the reciprocal condition number of the matrix A
- *         after equilibration (if done). If rcond is less than the machine
- *         precision (in particular, if rcond = 0), the matrix is singular
- *         to working precision. This condition is indicated by a return
- *         code of info > 0.
- *
- * FERR    (output) double*, dimension (B->ncol)   
- *         The estimated forward error bound for each solution vector   
- *         X(j) (the j-th column of the solution matrix X).   
- *         If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *         is an estimated upper bound for the magnitude of the largest 
- *         element in (X(j) - XTRUE) divided by the magnitude of the   
- *         largest element in X(j).  The estimate is as reliable as   
- *         the estimate for RCOND, and is almost always a slight   
- *         overestimate of the true error.
- *         If options->IterRefine = NOREFINE, ferr = 1.0.
- *
- * BERR    (output) double*, dimension (B->ncol)
- *         The componentwise relative backward error of each solution   
- *         vector X(j) (i.e., the smallest relative change in   
- *         any element of A or B that makes X(j) an exact solution).
- *         If options->IterRefine = NOREFINE, berr = 1.0.
- *
- * mem_usage (output) mem_usage_t*
- *         Record the memory usage statistics, consisting of following fields:
- *         - for_lu (float)
- *           The amount of space used in bytes for L\U data structures.
- *         - total_needed (float)
- *           The amount of space needed in bytes to perform factorization.
- *         - expansions (int)
- *           The number of memory expansions during the LU factorization.
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *         = 0: successful exit   
- *         < 0: if info = -i, the i-th argument had an illegal value   
- *         > 0: if info = i, and i is   
- *              <= A->ncol: U(i,i) is exactly zero. The factorization has   
- *                    been completed, but the factor U is exactly   
- *                    singular, so the solution and error bounds   
- *                    could not be computed.   
- *              = A->ncol+1: U is nonsingular, but RCOND is less than machine
- *                    precision, meaning that the matrix is singular to
- *                    working precision. Nevertheless, the solution and
- *                    error bounds are computed because there are a number
- *                    of situations where the computed solution can be more
- *                    accurate than the value of RCOND would suggest.   
- *              > A->ncol+1: number of bytes allocated when memory allocation
- *                    failure occurred, plus A->ncol.
- *
- */
-    DNformat  *Bstore, *Xstore;
-    doublecomplex    *Bmat, *Xmat;
-    int       ldb, ldx, nrhs;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int       colequ, equil, nofact, notran, rowequ, permc_spec;
-    trans_t   trant;
-    char      norm[1];
-    int       i, j, info1;
-    double    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
-    int       relax, panel_size;
-    double    diag_pivot_thresh, drop_tol;
-    double    t0;      /* temporary time */
-    double    *utime;
-    /* External functions */
-    extern double zlangs(char *, SuperMatrix *);
-    extern double dlamch_(char *);
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    *info = 0;
-    nofact = (options->Fact != FACTORED);
-    equil = (options->Equil == YES);
-    notran = (options->Trans == NOTRANS);
-    if ( nofact ) {
-       *(unsigned char *)equed = 'N';
-       rowequ = FALSE;
-       colequ = FALSE;
-    } else {
-       rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-       colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       smlnum = dlamch_("Safe minimum");
-       bignum = 1. / smlnum;
-    }
-#if 0
-printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
-       options->Fact, options->Trans, *equed);
-    /* Test the input parameters */
-    if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
-       options->Fact != SamePattern_SameRowPerm &&
-       !notran && options->Trans != TRANS && options->Trans != CONJ &&
-       !equil && options->Equil != NO)
-       *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-             A->Dtype != SLU_Z || A->Mtype != SLU_GE )
-       *info = -2;
-    else if (options->Fact == FACTORED &&
-            !(rowequ || colequ || lsame_(equed, "N")))
-       *info = -6;
-    else {
-       if (rowequ) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, R[j]);
-               rcmax = SUPERLU_MAX(rcmax, R[j]);
-           }
-           if (rcmin <= 0.) *info = -7;
-           else if ( A->nrow > 0)
-               rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else rowcnd = 1.;
-       }
-       if (colequ && *info == 0) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, C[j]);
-               rcmax = SUPERLU_MAX(rcmax, C[j]);
-           }
-           if (rcmin <= 0.) *info = -8;
-           else if (A->nrow > 0)
-               colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else colcnd = 1.;
-       }
-       if (*info == 0) {
-           if ( lwork < -1 ) *info = -12;
-           else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     B->Stype != SLU_DN || B->Dtype != SLU_Z || 
-                     B->Mtype != SLU_GE )
-               *info = -13;
-           else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     (B->ncol != 0 && B->ncol != X->ncol) ||
-                      X->Stype != SLU_DN ||
-                     X->Dtype != SLU_Z || X->Mtype != SLU_GE )
-               *info = -14;
-       }
-    }
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("zgssvx", &i);
-       return;
-    }
-    /* Initialization for factor parameters */
-    panel_size = sp_ienv(1);
-    relax      = sp_ienv(2);
-    diag_pivot_thresh = options->DiagPivotThresh;
-    drop_tol   = 0.0;
-    utime = stat->utime;
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       zCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       if ( notran ) { /* Reverse the transpose argument. */
-           trant = TRANS;
-           notran = 0;
-       } else {
-           trant = NOTRANS;
-           notran = 1;
-       }
-    } else { /* A->Stype == SLU_NC */
-       trant = options->Trans;
-       AA = A;
-    }
-    if ( nofact && equil ) {
-       t0 = SuperLU_timer_();
-       /* Compute row and column scalings to equilibrate the matrix A. */
-       zgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
-       if ( info1 == 0 ) {
-           /* Equilibrate matrix A. */
-           zlaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
-           rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-           colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       }
-       utime[EQUIL] = SuperLU_timer_() - t0;
-    }
-    if ( nrhs > 0 ) {
-        /* Scale the right hand side if equilibration was performed. */
-        if ( notran ) {
-           if ( rowequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        zd_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]);
-                   }
-           }
-        } else if ( colequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                    zd_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]);
-               }
-        }
-    }
-    if ( nofact ) {
-        t0 = SuperLU_timer_();
-       /*
-        * Gnet column permutation vector perm_c[], according to permc_spec:
-        *   permc_spec = NATURAL:  natural ordering 
-        *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-        *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-        *   permc_spec = COLAMD:   approximate minimum degree column ordering
-        *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-        */
-       permc_spec = options->ColPerm;
-       if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-            get_perm_c(permc_spec, AA, perm_c);
-       utime[COLPERM] = SuperLU_timer_() - t0;
-       t0 = SuperLU_timer_();
-       sp_preorder(options, AA, perm_c, etree, &AC);
-       utime[ETREE] = SuperLU_timer_() - t0;
-/*     printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-              relax, panel_size, sp_ienv(3), sp_ienv(4));
-       fflush(stdout); */
-       /* Compute the LU factorization of A*Pc. */
-       t0 = SuperLU_timer_();
-       zgstrf(options, &AC, drop_tol, relax, panel_size,
-              etree, work, lwork, perm_c, perm_r, L, U, stat, info);
-       utime[FACT] = SuperLU_timer_() - t0;
-       if ( lwork == -1 ) {
-           mem_usage->total_needed = *info - A->ncol;
-           return;
-       }
-    }
-    if ( options->PivotGrowth ) {
-        if ( *info > 0 ) {
-           if ( *info <= A->ncol ) {
-               /* Compute the reciprocal pivot growth factor of the leading
-                  rank-deficient *info columns of A. */
-               *recip_pivot_growth = zPivotGrowth(*info, AA, perm_c, L, U);
-           }
-           return;
-        }
-        /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
-        *recip_pivot_growth = zPivotGrowth(A->ncol, AA, perm_c, L, U);
-    }
-    if ( options->ConditionNumber ) {
-        /* Estimate the reciprocal of the condition number of A. */
-        t0 = SuperLU_timer_();
-        if ( notran ) {
-           *(unsigned char *)norm = '1';
-        } else {
-           *(unsigned char *)norm = 'I';
-        }
-        anorm = zlangs(norm, AA);
-        zgscon(norm, L, U, anorm, rcond, stat, info);
-        utime[RCOND] = SuperLU_timer_() - t0;
-    }
-    if ( nrhs > 0 ) {
-        /* Compute the solution matrix X. */
-        for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
-            for (i = 0; i < B->nrow; i++)
-               Xmat[i + j*ldx] = Bmat[i + j*ldb];
-        t0 = SuperLU_timer_();
-        zgstrs (trant, L, U, perm_c, perm_r, X, stat, info);
-        utime[SOLVE] = SuperLU_timer_() - t0;
-        /* Use iterative refinement to improve the computed solution and 
-           error bounds and backward error estimates for it. */
-        t0 = SuperLU_timer_();
-        if ( options->IterRefine != NOREFINE ) {
-            zgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B,
-                   X, ferr, berr, stat, info);
-        } else {
-            for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0;
-        }
-        utime[REFINE] = SuperLU_timer_() - t0;
-        /* Transform the solution matrix X to a solution of the original 
system. */
-        if ( notran ) {
-           if ( colequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        zd_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], C[i]);
-                   }
-           }
-        } else if ( rowequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                    zd_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]);
-                }
-        }
-    } /* end if nrhs > 0 */
-    if ( options->ConditionNumber ) {
-        /* Set INFO = A->ncol+1 if the matrix is singular to working 
precision. */
-        if ( *rcond < dlamch_("E") ) *info = A->ncol + 1;
-    }
-    if ( nofact ) {
-        zQuerySpace(L, U, mem_usage);
-        Destroy_CompCol_Permuted(&AC);
-    }
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-    }
diff --git a/superlu/zgstrf.c b/superlu/zgstrf.c
deleted file mode 100644
index 1aaec38a..00000000
--- a/superlu/zgstrf.c
+++ /dev/null
@@ -1,432 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_zdefs.h"
-extern void countnz();
-extern void fixupL();
-zgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
-        int relax, int panel_size, int *etree, void *work, int lwork,
-        int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
-        SuperLUStat_t *stat, int *info)
- * Purpose
- * =======
- *
- * ZGSTRF computes an LU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper 
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE.
- *
- * drop_tol (input) double (NOT IMPLEMENTED)
- *         Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
- * relax    (input) int
- *          To control degree of relaxing supernodes. If the number
- *          of nodes (columns) in a subtree of the elimination tree is less
- *          than relax, this subtree is considered as one supernode,
- *          regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *          A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *          Elimination tree of A'*A.
- *          Note: etree is a vector of parent pointers for a forest whose
- *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *          On input, the columns of A should be permuted so that the
- *          etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *          User-supplied work space and space for the output data structures.
- *          Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *         Column permutation vector, which defines the 
- *          permutation matrix Pc; perm_c[i] = j means column i of A is 
- *          in position j in A*Pc.
- *          When searching for diagonal, perm_c[*] is applied to the
- *          row subscripts of A, so that diagonal threshold pivoting
- *          can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *          Row permutation vector which defines the permutation matrix Pr,
- *          perm_r[i] = j means row i of A is in position j in Pr*A.
- *          If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *             will try to use the input perm_r, unless a certain threshold
- *             criterion is violated. In that case, perm_r is overwritten by
- *             a new permutation determined by partial pivoting or diagonal
- *             threshold pivoting.
- *          Otherwise, perm_r is output argument;
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = SLU_NC, 
- *          Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *          = 0: successful exit
- *          < 0: if info = -i, the i-th argument had an illegal value
- *          > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                and division by zero will occur if it is used to solve a
- *                system of equations.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol. If lwork = -1, it is
- *                the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays: 
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   xprune[0:n-1]: xprune[*] points to locations in subscript 
- *     vector lsub[*]. For column i, xprune[i] denotes the point where 
- *     structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need 
- *     to be traversed for symbolic factorization.
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been 
- *     reached when working on column j.
- *     Storage: relative to original row subscripts
- *     NOTE: There are 3 of them: marker/marker1 are used for panel dfs, 
- *           see zpanel_dfs.c; marker2 is used for inner-factorization,
- *            see zcolumn_dfs.c.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *      Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs) 
- *     unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *     in topological order of the dfs. A supernode representative is the 
- *     last column of a supernode.
- *      The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a 
- *     supernodal representative r, repfnz[r] is the location of the first 
- *     nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *     indicates the supernode r has been explored.
- *     NOTE: There are W of them, each used for one column of a panel. 
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below 
- *      the panel diagonal. These are filled in during zpanel_dfs(), and are
- *      used later in the inner LU factorization within the panel.
- *     panel_lsub[]/dense[] pair forms the SPA data structure.
- *     NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *                NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *     The size of this array is defined by NUM_TEMPV() in zsp_defs.h.
- *
- */
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when 
-                                  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *iwork;
-    doublecomplex    *zwork;
-    int              *segrep, *repfnz, *parent, *xplore;
-    int              *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide 
SPA */
-    int              *xprune;
-    int              *marker;
-    doublecomplex    *dense, *tempv;
-    int       *relax_end;
-    doublecomplex    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    int       pivrow;   /* pivotal row number in the original matrix A */
-    int       nseg1;   /* no of segments in U-column above panel row jcol */
-    int       nseg;    /* no of segments in each U-column */
-    register int jcol; 
-    register int kcol; /* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;   /* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-    iinfo    = 0;
-    m        = A->nrow;
-    n        = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a        = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-    /* Allocate storage common to the factor routines */
-    *info = zLUMemInit(fact, work, lwork, m, n, Astore->nnz,
-                       panel_size, L, U, &Glu, &iwork, &zwork);
-    if ( *info ) return;
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-            &repfnz, &panel_lsub, &xprune, &marker);
-    zSetRWork(m, panel_size, zwork, &dense, &tempv);
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-       /* Compute the inverse of perm_r */
-       iperm_r = (int *) intMalloc(m);
-       for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-       iperm_r_allocated = 1;
-    }
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES ) {
-        heap_relax_snode(n, etree, relax, marker, relax_end); 
-    } else {
-        relax_snode(n, etree, relax, marker, relax_end); 
-    }
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-    /* 
-     * Work on one "panel" at a time. A panel is one of the following: 
-     *    (a) a relaxed supernode at the bottom of the etree, or
-     *    (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-       if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-           kcol = relax_end[jcol];       /* end of the relaxed snode */
-           panel_histo[kcol-jcol+1]++;
-           /* --------------------------------------
-            * Factorize the relaxed supernode(jcol:kcol) 
-            * -------------------------------------- */
-           /* Determine the union of the row structure of the snode */
-           if ( (*info = zsnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-                                   xprune, marker, &Glu)) != 0 )
-               return;
-            nextu    = xusub[jcol];
-           nextlu   = xlusup[jcol];
-           jsupno   = supno[jcol];
-           fsupc    = xsup[jsupno];
-           new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-           nzlumax = Glu.nzlumax;
-           while ( new_next > nzlumax ) {
-               if ( (*info = zLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) 
-                   return;
-           }
-           for (icol = jcol; icol<= kcol; icol++) {
-               xusub[icol+1] = nextu;
-               /* Scatter into SPA dense[*] */
-               for (k = xa_begin[icol]; k < xa_end[icol]; k++)
-                   dense[asub[k]] = a[k];
-               /* Numeric update within the snode */
-               zsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-               if ( (*info = zpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-#ifdef DEBUG
-               zprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
-           }
-           jcol = icol;
-       } else { /* Work on one panel of panel_size columns */
-           /* Adjust panel_size so that a panel won't overlap with the next 
-            * relaxed snode.
-            */
-           panel_size = w_def;
-           for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) 
-               if ( relax_end[k] != EMPTY ) {
-                   panel_size = k - jcol;
-                   break;
-               }
-           if ( k == min_mn ) panel_size = min_mn - jcol;
-           panel_histo[panel_size]++;
-           /* symbolic factor on a panel of columns */
-           zpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                     dense, panel_lsub, segrep, repfnz, xprune,
-                     marker, parent, xplore, &Glu);
-           /* numeric sup-panel updates in topological order */
-           zpanel_bmod(m, panel_size, jcol, nseg1, dense,
-                       tempv, segrep, repfnz, &Glu, stat);
-           /* Sparse LU within the panel, and below panel diagonal */
-           for ( jj = jcol; jj < jcol + panel_size; jj++) {
-               k = (jj - jcol) * m; /* column index for w-wide arrays */
-               nseg = nseg1;   /* Begin after all the panel segments */
-               if ((*info = zcolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
-                                       segrep, &repfnz[k], xprune, marker,
-                                       parent, xplore, &Glu)) != 0) return;
-               /* Numeric updates */
-               if ((*info = zcolumn_bmod(jj, (nseg - nseg1), &dense[k],
-                                        tempv, &segrep[nseg1], &repfnz[k],
-                                        jcol, &Glu, stat)) != 0) return;
-               /* Copy the U-segments to ucol[*] */
-               if ((*info = zcopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-                                         perm_r, &dense[k], &Glu)) != 0)
-                   return;
-               if ( (*info = zpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-               /* Prune columns (0:jj-1) using column jj */
-               zpruneL(jj, perm_r, pivrow, nseg, segrep,
-                        &repfnz[k], xprune, &Glu);
-               /* Reset repfnz[] for this column */
-               resetrep_col (nseg, segrep, &repfnz[k]);
-#ifdef DEBUG
-               zprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
-           }
-           jcol += panel_size; /* Move to the next panel */
-       } /* else */
-    } /* for */
-    *info = iinfo;
-    if ( m > n ) {
-       k = 0;
-        for (i = 0; i < m; ++i) 
-            if ( perm_r[i] == EMPTY ) {
-               perm_r[i] = n + k;
-               ++k;
-           }
-    }
-    countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
-    fixupL(min_mn, perm_r, &Glu);
-    zLUWorkFree(iwork, zwork, &Glu); /* Free work space and compress storage */
-    if ( fact == SamePattern_SameRowPerm ) {
-        /* L and U structures may have changed due to possibly different
-          pivoting, even though the storage is available.
-          There could also be memory expansions, so the array locations
-           may have changed, */
-        ((SCformat *)L->Store)->nnz = nnzL;
-       ((SCformat *)L->Store)->nsuper = Glu.supno[n];
-       ((SCformat *)L->Store)->nzval = Glu.lusup;
-       ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-       ((SCformat *)L->Store)->rowind = Glu.lsub;
-       ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-       ((NCformat *)U->Store)->nnz = nnzU;
-       ((NCformat *)U->Store)->nzval = Glu.ucol;
-       ((NCformat *)U->Store)->rowind = Glu.usub;
-       ((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-        zCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, 
-                                Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-                                Glu.xsup, SLU_SC, SLU_Z, SLU_TRLU);
-       zCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, 
-                              Glu.usub, Glu.xusub, SLU_NC, SLU_Z, SLU_TRU);
-    }
-    ops[FACT] += ops[TRSV] + ops[GEMV];        
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
diff --git a/superlu/zgstrs.c b/superlu/zgstrs.c
deleted file mode 100644
index e415e47a..00000000
--- a/superlu/zgstrs.c
+++ /dev/null
@@ -1,344 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_zdefs.h"
-extern void ztrsm_();
-extern void zgemm_();
- * Function prototypes 
- */
-void zusolve(int, int, doublecomplex*, doublecomplex*);
-void zlsolve(int, int, doublecomplex*, doublecomplex*);
-void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);
-zgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
- * Purpose
- * =======
- *
- * ZGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         zgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         zgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *        Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *        < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-    int      incx = 1, incy = 1;
-    doublecomplex   alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    doublecomplex   *work_col;
-    doublecomplex   temp_comp;
-    DNformat *Bstore;
-    doublecomplex   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    doublecomplex   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    doublecomplex   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void zprint_soln();
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_Z || L->Mtype != SLU_TRLU )
-       *info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_Z || U->Mtype != SLU_TRU )
-       *info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_Z || B->Mtype != SLU_GE )
-       *info = -6;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("zgstrs", &i);
-       return;
-    }
-    n = L->nrow;
-    work = doublecomplexCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = doublecomplexMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    if ( trans == NOTRANS ) {
-       /* Permute right hand sides to form Pr*B */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       /* Forward solve PLy=Pb. */
-       for (k = 0; k <= Lstore->nsuper; k++) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           nrow = nsupr - nsupc;
-           solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
-           solve_ops += 8 * nrow * nsupc * nrhs;
-           if ( nsupc == 1 ) {
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   luptr = L_NZ_START(fsupc);
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       zz_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
-                       z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-                   }
-               }
-           } else {
-               luptr = L_NZ_START(fsupc);
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("N", strlen("N"));
-               ftcs3 = _cptofcd("U", strlen("U"));
-               CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-               ztrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               zgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   work_col = &work[j*n];
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       z_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
-                       work_col[i].r = 0.0;
-                       work_col[i].i = 0.0;
-                       iptr++;
-                   }
-               }
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   zlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-                   zmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-                           &rhs_work[fsupc], &work[0] );
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       z_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
-                       work[i].r = 0.;
-                       work[i].i = 0.;
-                       iptr++;
-                   }
-               }
-           } /* else ... */
-       } /* for L-solve */
-#ifdef DEBUG
-       printf("After L-solve: y=\n");
-       zprint_soln(n, nrhs, Bmat);
-       /*
-        * Back solve Ux=y.
-        */
-       for (k = Lstore->nsuper; k >= 0; k--) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           luptr = L_NZ_START(fsupc);
-           solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;
-           if ( nsupc == 1 ) {
-               rhs_work = &Bmat[0];
-               for (j = 0; j < nrhs; j++) {
-                   z_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
-                   rhs_work += ldb;
-               }
-           } else {
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("U", strlen("U"));
-               ftcs3 = _cptofcd("N", strlen("N"));
-               CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               ztrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               for (j = 0; j < nrhs; j++)
-                   zusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-           }
-           for (j = 0; j < nrhs; ++j) {
-               rhs_work = &Bmat[j*ldb];
-               for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-                       irow = U_SUB(i);
-                       zz_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
-                       z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-                   }
-               }
-           }
-       } /* for U-solve */
-#ifdef DEBUG
-       printf("After U-solve: x=\n");
-       zprint_soln(n, nrhs, Bmat);
-       /* Compute the final solution X := Pc*X. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-        stat->ops[SOLVE] = solve_ops;
-    } else { /* Solve A'*X=B or CONJ(A)*X=B */
-       /* Permute right hand sides to form Pc'*B. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       stat->ops[SOLVE] = 0;
-        if (trans == TRANS) {
-           for (k = 0; k < nrhs; ++k) {
-               /* Multiply by inv(U'). */
-               sp_ztrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-               /* Multiply by inv(L'). */
-               sp_ztrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-           }
-         } else { /* trans == CONJ */
-            for (k = 0; k < nrhs; ++k) {                
-                /* Multiply by conj(inv(U')). */
-                sp_ztrsv("U", "C", "N", L, U, &Bmat[k*ldb], stat, info);
-                /* Multiply by conj(inv(L')). */
-                sp_ztrsv("L", "C", "U", L, U, &Bmat[k*ldb], stat, info);
-           }
-         }
-       /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-    }
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
- * Diagnostic print of the solution vector 
- */
-zprint_soln(int n, int nrhs, doublecomplex *soln)
-    int i;
-    for (i = 0; i < n; i++) 
-       printf("\t%d: %.4f\n", i, soln[i].r);
diff --git a/superlu/zlacon.c b/superlu/zlacon.c
deleted file mode 100644
index 33822b3e..00000000
--- a/superlu/zlacon.c
+++ /dev/null
@@ -1,236 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <math.h>
-#include "slu_Cnames.h"
-#include "slu_dcomplex.h"
-extern void zcopy_();
-zlacon_(int *n, doublecomplex *v, doublecomplex *x, double *est, int *kase)
-    Purpose   
-    =======   
-    ZLACON estimates the 1-norm of a square matrix A.   
-    Reverse communication is used for evaluating matrix-vector products. 
-    Arguments   
-    =========   
-    N      (input) INT
-           The order of the matrix.  N >= 1.   
-    V      (workspace) DOUBLE COMPLEX PRECISION array, dimension (N)   
-           On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
-           (W is not returned).   
-    X      (input/output) DOUBLE COMPLEX PRECISION array, dimension (N)   
-           On an intermediate return, X should be overwritten by   
-                 A * X,   if KASE=1,   
-                 A' * X,  if KASE=2,
-           where A' is the conjugate transpose of A,
-           and ZLACON must be re-called with all the other parameters   
-           unchanged.   
-    EST    (output) DOUBLE PRECISION   
-           An estimate (a lower bound) for norm(A).   
-    KASE   (input/output) INT
-           On the initial call to ZLACON, KASE should be 0.   
-           On an intermediate return, KASE will be 1 or 2, indicating   
-           whether X should be overwritten by A * X  or A' * X.   
-           On the final return from ZLACON, KASE will again be 0.   
-    Further Details   
-    ======= =======   
-    Contributed by Nick Higham, University of Manchester.   
-    Originally named CONEST, dated March 16, 1988.   
-    Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
-    a real or complex matrix, with applications to condition estimation", 
-    ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
-    ===================================================================== 
-    /* Table of constant values */
-    int c__1 = 1;
-    doublecomplex      zero = {0.0, 0.0};
-    doublecomplex      one = {1.0, 0.0};
-    /* System generated locals */
-    double d__1;
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static double altsgn, estold;
-    static int i, j;
-    double temp;
-    double safmin;
-    extern double dlamch_(char *);
-    extern int izmax1_(int *, doublecomplex *, int *);
-    extern double dzsum1_(int *, doublecomplex *, int *);
-    safmin = dlamch_("Safe minimum");
-    if ( *kase == 0 ) {
-       for (i = 0; i < *n; ++i) {
-           x[i].r = 1. / (double) (*n);
-           x[i].i = 0.;
-       }
-       *kase = 1;
-       jump = 1;
-       return 0;
-    }
-    switch (jump) {
-       case 1:  goto L20;
-       case 2:  goto L40;
-       case 3:  goto L70;
-       case 4:  goto L110;
-       case 5:  goto L140;
-    }
-    /*     ................ ENTRY   (JUMP = 1)   
-  L20:
-    if (*n == 1) {
-       v[0] = x[0];
-       *est = z_abs(&v[0]);
-       /*        ... QUIT */
-       goto L150;
-    }
-    *est = dzsum1_(n, x, &c__1);
-    for (i = 0; i < *n; ++i) {
-       d__1 = z_abs(&x[i]);
-       if (d__1 > safmin) {
-           d__1 = 1 / d__1;
-           x[i].r *= d__1;
-           x[i].i *= d__1;
-       } else {
-           x[i] = one;
-       }
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 2)   
-    j = izmax1_(n, &x[0], &c__1);
-    --j;
-    iter = 2;
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 3)   
-#ifdef _CRAY
-    CCOPY(n, x, &c__1, v, &c__1);
-    zcopy_(n, x, &c__1, v, &c__1);
-    estold = *est;
-    *est = dzsum1_(n, v, &c__1);
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-    for (i = 0; i < *n; ++i) {
-       d__1 = z_abs(&x[i]);
-       if (d__1 > safmin) {
-           d__1 = 1 / d__1;
-           x[i].r *= d__1;
-           x[i].i *= d__1;
-       } else {
-           x[i] = one;
-       }
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 4)   
-    jlast = j;
-    j = izmax1_(n, &x[0], &c__1);
-    --j;
-    if (x[jlast].r != (d__1 = x[j].r, fabs(d__1)) && iter < 5) {
-       ++iter;
-       goto L50;
-    }
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-       x[i-1].r = altsgn * ((double)(i - 1) / (double)(*n - 1) + 1.);
-       x[i-1].i = 0.;
-       altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    /*     ................ ENTRY   (JUMP = 5)   
-    temp = dzsum1_(n, x, &c__1) / (double)(*n * 3) * 2.;
-    if (temp > *est) {
-#ifdef _CRAY
-       CCOPY(n, &x[0], &c__1, &v[0], &c__1);
-       zcopy_(n, &x[0], &c__1, &v[0], &c__1);
-       *est = temp;
-    }
-    *kase = 0;
-    return 0;
-} /* zlacon_ */
diff --git a/superlu/zlangs.c b/superlu/zlangs.c
deleted file mode 100644
index bb3f95a2..00000000
--- a/superlu/zlangs.c
+++ /dev/null
@@ -1,131 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  zlangs.c
- * History:     Modified from lapack routine ZLANGE
- */
-#include <math.h>
-#include "slu_zdefs.h"
-double zlangs(char *norm, SuperMatrix *A)
-    Purpose   
-    =======   
-    ZLANGS returns the value of the one norm, or the Frobenius norm, or 
-    the infinity norm, or the element of largest absolute value of a 
-    real matrix A.   
-    Description   
-    ===========   
-    ZLANGE returns the value   
-       ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
-                (   
-                ( norm1(A),         NORM = '1', 'O' or 'o'   
-                (   
-                ( normI(A),         NORM = 'I' or 'i'   
-                (   
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
-    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
-    Arguments   
-    =========   
-    NORM    (input) CHARACTER*1   
-            Specifies the value to be returned in ZLANGE as described above.   
-    A       (input) SuperMatrix*
-            The M by N sparse matrix A. 
-   ===================================================================== 
-    /* Local variables */
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    int      i, j, irow;
-    double   value, sum;
-    double   *rwork;
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) {
-       value = 0.;
-    } else if (lsame_(norm, "M")) {
-       /* Find max(abs(A(i,j))). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
-               value = SUPERLU_MAX( value, z_abs( &Aval[i]) );
-    } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') {
-       /* Find norm1(A). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j) {
-           sum = 0.;
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) 
-               sum += z_abs( &Aval[i] );
-           value = SUPERLU_MAX(value,sum);
-       }
-    } else if (lsame_(norm, "I")) {
-       /* Find normI(A). */
-       if ( !(rwork = (double *) SUPERLU_MALLOC(A->nrow * sizeof(double))) )
-           ABORT("SUPERLU_MALLOC fails for rwork.");
-       for (i = 0; i < A->nrow; ++i) rwork[i] = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) {
-               irow = Astore->rowind[i];
-               rwork[irow] += z_abs( &Aval[i] );
-           }
-       value = 0.;
-       for (i = 0; i < A->nrow; ++i)
-           value = SUPERLU_MAX(value, rwork[i]);
-       SUPERLU_FREE (rwork);
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
-       /* Find normF(A). */
-       ABORT("Not implemented.");
-    } else
-       ABORT("Illegal norm specified.");
-    return (value);
-} /* zlangs */
diff --git a/superlu/zlaqgs.c b/superlu/zlaqgs.c
deleted file mode 100644
index 1753737d..00000000
--- a/superlu/zlaqgs.c
+++ /dev/null
@@ -1,159 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:  zlaqgs.c
- * History:     Modified from LAPACK routine ZLAQGE
- */
-#include <math.h>
-#include "slu_zdefs.h"
-zlaqgs(SuperMatrix *A, double *r, double *c, 
-       double rowcnd, double colcnd, double amax, char *equed)
-    Purpose   
-    =======   
-    ZLAQGS equilibrates a general sparse M by N matrix A using the row and   
-    scaling factors in the vectors R and C.   
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-    Arguments   
-    =========   
-    A       (input/output) SuperMatrix*
-            On exit, the equilibrated matrix.  See EQUED for the form of 
-            the equilibrated matrix. The type of A can be:
-           Stype = NC; Dtype = SLU_Z; Mtype = GE.
-    R       (input) double*, dimension (A->nrow)
-            The row scale factors for A.
-    C       (input) double*, dimension (A->ncol)
-            The column scale factors for A.
-    ROWCND  (input) double
-            Ratio of the smallest R(i) to the largest R(i).
-    COLCND  (input) double
-            Ratio of the smallest C(i) to the largest C(i).
-    AMAX    (input) double
-            Absolute value of largest matrix entry.
-    EQUED   (output) char*
-            Specifies the form of equilibration that was done.   
-            = 'N':  No equilibration   
-            = 'R':  Row equilibration, i.e., A has been premultiplied by  
-                    diag(R).   
-            = 'C':  Column equilibration, i.e., A has been postmultiplied  
-                    by diag(C).   
-            = 'B':  Both row and column equilibration, i.e., A has been
-                    replaced by diag(R) * A * diag(C).   
-    Internal Parameters   
-    ===================   
-    THRESH is a threshold value used to decide if row or column scaling   
-    should be done based on the ratio of the row or column scaling   
-    factors.  If ROWCND < THRESH, row scaling is done, and if   
-    COLCND < THRESH, column scaling is done.   
-    LARGE and SMALL are threshold values used to decide if row scaling   
-    should be done based on the absolute size of the largest matrix   
-    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
-    ===================================================================== 
-#define THRESH    (0.1)
-    /* Local variables */
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    int i, j, irow;
-    double large, small, cj;
-    extern double dlamch_(char *);
-    double temp;
-    /* Quick return if possible */
-    if (A->nrow <= 0 || A->ncol <= 0) {
-       *(unsigned char *)equed = 'N';
-       return;
-    }
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Initialize LARGE and SMALL. */
-    small = dlamch_("Safe minimum") / dlamch_("Precision");
-    large = 1. / small;
-    if (rowcnd >= THRESH && amax >= small && amax <= large) {
-       if (colcnd >= THRESH)
-           *(unsigned char *)equed = 'N';
-       else {
-           /* Column scaling */
-           for (j = 0; j < A->ncol; ++j) {
-               cj = c[j];
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   zd_mult(&Aval[i], &Aval[i], cj);
-                }
-           }
-           *(unsigned char *)equed = 'C';
-       }
-    } else if (colcnd >= THRESH) {
-       /* Row scaling, no column scaling */
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               zd_mult(&Aval[i], &Aval[i], r[irow]);
-           }
-       *(unsigned char *)equed = 'R';
-    } else {
-       /* Row and column scaling */
-       for (j = 0; j < A->ncol; ++j) {
-           cj = c[j];
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               temp = cj * r[irow];
-               zd_mult(&Aval[i], &Aval[i], temp);
-           }
-       }
-       *(unsigned char *)equed = 'B';
-    }
-    return;
-} /* zlaqgs */
diff --git a/superlu/zmemory.c b/superlu/zmemory.c
deleted file mode 100644
index 36968d65..00000000
--- a/superlu/zmemory.c
+++ /dev/null
@@ -1,689 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include "slu_zdefs.h"
-/* Constants */
-#define NO_MEMTYPE  4      /* 0: lusup;
-                             1: ucol;
-                             2: lsub;
-                             3: usub */
-#define GluIntArray(n)   (5 * (n) + 5)
-/* Internal prototypes */
-void  *zexpand (int *, MemType,int, int, GlobalLU_t *);
-int   zLUWorkInit (int, int, int, int **, doublecomplex **, LU_space_t);
-void  copy_mem_doublecomplex (int, void *, void *);
-void  zStackCompress (GlobalLU_t *);
-void  zSetupSpace (void *, int, LU_space_t *);
-void  *zuser_malloc (int, int);
-void  zuser_free (int, int);
-/* External prototypes (in memory.c - prec-indep) */
-extern void    copy_mem_int    (int, void *, void *);
-extern void    user_bcopy      (char *, char *, int);
-/* Headers for 4 types of dynamatically managed memory */
-typedef struct e_node {
-    int size;      /* length of the memory that has been used */
-    void *mem;     /* pointer to the new malloc'd store */
-} ExpHeader;
-typedef struct {
-    int  size;
-    int  used;
-    int  top1;  /* grow upward, relative to &array[0] */
-    int  top2;  /* grow downward */
-    void *array;
-} LU_stack_t;
-/* Variables local to this file */
-static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
-static LU_stack_t stack;
-static int no_expand;
-/* Macros to manipulate stack */
-#define StackFull(x)         ( x + stack.used >= stack.size )
-#define NotDoubleAlign(addr) ( (long int)addr & 7 )
-#define DoubleAlign(addr)    ( ((long int)addr + 7) & ~7L )
-#define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
-                             (w + 1) * m * sizeof(doublecomplex) )
-#define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
- * Setup the memory model to be used for factorization.
- *    lwork = 0: use system malloc;
- *    lwork > 0: use user-supplied work[] space.
- */
-void zSetupSpace(void *work, int lwork, LU_space_t *MemModel)
-    if ( lwork == 0 ) {
-       *MemModel = SYSTEM; /* malloc/free */
-    } else if ( lwork > 0 ) {
-       *MemModel = USER;   /* user provided space */
-       stack.used = 0;
-       stack.top1 = 0;
-       stack.top2 = (lwork/4)*4; /* must be word addressable */
-       stack.size = stack.top2;
-       stack.array = (void *) work;
-    }
-void *zuser_malloc(int bytes, int which_end)
-    void *buf;
-    if ( StackFull(bytes) ) return (NULL);
-    if ( which_end == HEAD ) {
-       buf = (char*) stack.array + stack.top1;
-       stack.top1 += bytes;
-    } else {
-       stack.top2 -= bytes;
-       buf = (char*) stack.array + stack.top2;
-    }
-    stack.used += bytes;
-    return buf;
-void zuser_free(int bytes, int which_end)
-    if ( which_end == HEAD ) {
-       stack.top1 -= bytes;
-    } else {
-       stack.top2 += bytes;
-    }
-    stack.used -= bytes;
- * mem_usage consists of the following fields:
- *    - for_lu (float)
- *      The amount of space used in bytes for the L\U data structures.
- *    - total_needed (float)
- *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
- */
-int zQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
-    SCformat *Lstore;
-    NCformat *Ustore;
-    register int n, iword, dword, panel_size = sp_ienv(1);
-    Lstore = L->Store;
-    Ustore = U->Store;
-    n = L->ncol;
-    iword = sizeof(int);
-    dword = sizeof(doublecomplex);
-    /* For LU factors */
-    mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
-                                dword + Lstore->rowind_colptr[n] * iword );
-    mem_usage->for_lu += (float)( (n + 1) * iword +
-                                Ustore->colptr[n] * (dword + iword) );
-    /* Working storage to support factorization */
-    mem_usage->total_needed = mem_usage->for_lu +
-       (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
-               (panel_size + 1) * n * dword );
-    mem_usage->expansions = --no_expand;
-    return 0;
-} /* zQuerySpace */
- * Allocate storage for the data structures common to all factor routines.
- * For those unpredictable size, make a guess as FILL * nnz(A).
- * Return value:
- *     If lwork = -1, return the estimated amount of space required, plus n;
- *     otherwise, return the amount of space actually allocated when
- *     memory allocation failure occurred.
- */
-zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
-         int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
-         int **iwork, doublecomplex **dwork)
-    int      info, iword, dword;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    int      *xsup, *supno;
-    int      *lsub, *xlsub;
-    doublecomplex   *lusup;
-    int      *xlusup;
-    doublecomplex   *ucol;
-    int      *usub, *xusub;
-    int      nzlmax, nzumax, nzlumax;
-    int      FILL = sp_ienv(6);
-    Glu->n    = n;
-    no_expand = 0;
-    iword     = sizeof(int);
-    dword     = sizeof(doublecomplex);
-    if ( !expanders )  
-        expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
-    if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
-    if ( fact != SamePattern_SameRowPerm ) {
-       /* Guess for L\U factors */
-       nzumax = nzlumax = FILL * annz;
-       nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else {
-           zSetupSpace(work, lwork, &Glu->MemModel);
-       }
-#if ( PRNTlevel >= 1 )
-       printf("zLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
-              FILL, nzlmax, nzumax);
-       fflush(stdout);
-       /* Integer pointers for L\U factors */
-       if ( Glu->MemModel == SYSTEM ) {
-           xsup   = intMalloc(n+1);
-           supno  = intMalloc(n+1);
-           xlsub  = intMalloc(n+1);
-           xlusup = intMalloc(n+1);
-           xusub  = intMalloc(n+1);
-       } else {
-           xsup   = (int *)zuser_malloc((n+1) * iword, HEAD);
-           supno  = (int *)zuser_malloc((n+1) * iword, HEAD);
-           xlsub  = (int *)zuser_malloc((n+1) * iword, HEAD);
-           xlusup = (int *)zuser_malloc((n+1) * iword, HEAD);
-           xusub  = (int *)zuser_malloc((n+1) * iword, HEAD);
-       }
-       lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu );
-       ucol  = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu );
-       lsub  = (int *)    zexpand( &nzlmax, LSUB, 0, 0, Glu );
-       usub  = (int *)    zexpand( &nzumax, USUB, 0, 1, Glu );
-       while ( !lusup || !ucol || !lsub || !usub ) {
-           if ( Glu->MemModel == SYSTEM ) {
-               SUPERLU_FREE(lusup); 
-               SUPERLU_FREE(ucol); 
-               SUPERLU_FREE(lsub); 
-               SUPERLU_FREE(usub);
-           } else {
-               zuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
-           }
-           nzlumax /= 2;
-           nzumax /= 2;
-           nzlmax /= 2;
-           if ( nzlumax < annz ) {
-               printf("Not enough memory to perform factorization.\n");
-               return (zmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-           }
-#if ( PRNTlevel >= 1)
-           printf("zLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
-                  nzlmax, nzumax);
-           fflush(stdout);
-           lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu );
-           ucol  = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu );
-           lsub  = (int *)    zexpand( &nzlmax, LSUB, 0, 0, Glu );
-           usub  = (int *)    zexpand( &nzumax, USUB, 0, 1, Glu );
-       }
-    } else {
-       /* fact == SamePattern_SameRowPerm */
-       Lstore   = L->Store;
-       Ustore   = U->Store;
-       xsup     = Lstore->sup_to_col;
-       supno    = Lstore->col_to_sup;
-       xlsub    = Lstore->rowind_colptr;
-       xlusup   = Lstore->nzval_colptr;
-       xusub    = Ustore->colptr;
-       nzlmax   = Glu->nzlmax;    /* max from previous factorization */
-       nzumax   = Glu->nzumax;
-       nzlumax  = Glu->nzlumax;
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else if ( lwork == 0 ) {
-           Glu->MemModel = SYSTEM;
-       } else {
-           Glu->MemModel = USER;
-           stack.top2 = (lwork/4)*4; /* must be word-addressable */
-           stack.size = stack.top2;
-       }
-       lsub  = expanders[LSUB].mem  = Lstore->rowind;
-       lusup = expanders[LUSUP].mem = Lstore->nzval;
-       usub  = expanders[USUB].mem  = Ustore->rowind;
-       ucol  = expanders[UCOL].mem  = Ustore->nzval;;
-       expanders[LSUB].size         = nzlmax;
-       expanders[LUSUP].size        = nzlumax;
-       expanders[USUB].size         = nzumax;
-       expanders[UCOL].size         = nzumax;  
-    }
-    Glu->xsup    = xsup;
-    Glu->supno   = supno;
-    Glu->lsub    = lsub;
-    Glu->xlsub   = xlsub;
-    Glu->lusup   = lusup;
-    Glu->xlusup  = xlusup;
-    Glu->ucol    = ucol;
-    Glu->usub    = usub;
-    Glu->xusub   = xusub;
-    Glu->nzlmax  = nzlmax;
-    Glu->nzumax  = nzumax;
-    Glu->nzlumax = nzlumax;
-    info = zLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
-    if ( info )
-       return ( info + zmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-    ++no_expand;
-    return 0;
-} /* zLUMemInit */
-/* Allocate known working storage. Returns 0 if success, otherwise
-   returns the number of bytes allocated so far when failure occurred. */
-zLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
-            doublecomplex **dworkptr, LU_space_t MemModel)
-    int    isize, dsize, extra;
-    doublecomplex *old_ptr;
-    int    maxsuper = sp_ienv(3),
-           rowblk   = sp_ienv(4);
-    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
-    dsize = (m * panel_size +
-            NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(doublecomplex);
-    if ( MemModel == SYSTEM ) 
-       *iworkptr = (int *) intCalloc(isize/sizeof(int));
-    else
-       *iworkptr = (int *) zuser_malloc(isize, TAIL);
-    if ( ! *iworkptr ) {
-       fprintf(stderr, "zLUWorkInit: malloc fails for local iworkptr[]\n");
-       return (isize + n);
-    }
-    if ( MemModel == SYSTEM )
-       *dworkptr = (doublecomplex *) SUPERLU_MALLOC(dsize);
-    else {
-       *dworkptr = (doublecomplex *) zuser_malloc(dsize, TAIL);
-       if ( NotDoubleAlign(*dworkptr) ) {
-           old_ptr = *dworkptr;
-           *dworkptr = (doublecomplex*) DoubleAlign(*dworkptr);
-           *dworkptr = (doublecomplex*) ((double*)*dworkptr - 1);
-           extra = (char*)old_ptr - (char*)*dworkptr;
-#ifdef DEBUG       
-           printf("zLUWorkInit: not aligned, extra %d\n", extra);
-           stack.top2 -= extra;
-           stack.used += extra;
-       }
-    }
-    if ( ! *dworkptr ) {
-       fprintf(stderr, "malloc fails for local dworkptr[].");
-       return (isize + dsize + n);
-    }
-    return 0;
- * Set up pointers for real working arrays.
- */
-zSetRWork(int m, int panel_size, doublecomplex *dworkptr,
-        doublecomplex **dense, doublecomplex **tempv)
-    doublecomplex zero = {0.0, 0.0};
-    int maxsuper = sp_ienv(3),
-        rowblk   = sp_ienv(4);
-    *dense = dworkptr;
-    *tempv = *dense + panel_size*m;
-    zfill (*dense, m * panel_size, zero);
-    zfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
- * Free the working storage used by factor routines.
- */
-void zLUWorkFree(int *iwork, doublecomplex *dwork, GlobalLU_t *Glu)
-    if ( Glu->MemModel == SYSTEM ) {
-       SUPERLU_FREE (iwork);
-       SUPERLU_FREE (dwork);
-    } else {
-       stack.used -= (stack.size - stack.top2);
-       stack.top2 = stack.size;
-/*     zStackCompress(Glu);  */
-    }
-    SUPERLU_FREE (expanders);  
-    expanders = 0;
-/* Expand the data structures for L and U during the factorization.
- * Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-zLUMemXpand(int jcol,
-          int next,          /* number of elements currently in the factors */
-          MemType mem_type,  /* which type of memory to expand  */
-          int *maxlen,       /* modified - maximum length of a data structure 
-          GlobalLU_t *Glu    /* modified - global LU data structures */
-          )
-    void   *new_mem;
-#ifdef DEBUG    
-    printf("zLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
-          jcol, next, *maxlen, mem_type);
-    if (mem_type == USUB) 
-       new_mem = zexpand(maxlen, mem_type, next, 1, Glu);
-    else
-       new_mem = zexpand(maxlen, mem_type, next, 0, Glu);
-    if ( !new_mem ) {
-       int    nzlmax  = Glu->nzlmax;
-       int    nzumax  = Glu->nzumax;
-       int    nzlumax = Glu->nzlumax;
-       fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
-       return (zmemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
-    }
-    switch ( mem_type ) {
-      case LUSUP:
-       Glu->lusup   = (doublecomplex *) new_mem;
-       Glu->nzlumax = *maxlen;
-       break;
-      case UCOL:
-       Glu->ucol   = (doublecomplex *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-      case LSUB:
-       Glu->lsub   = (int *) new_mem;
-       Glu->nzlmax = *maxlen;
-       break;
-      case USUB:
-       Glu->usub   = (int *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-    }
-    return 0;
-copy_mem_doublecomplex(int howmany, void *old, void *new)
-    register int i;
-    doublecomplex *dold = old;
-    doublecomplex *dnew = new;
-    for (i = 0; i < howmany; i++) dnew[i] = dold[i];
- * Expand the existing storage to accommodate more fill-ins.
- */
-*zexpand (
-        int *prev_len,   /* length used from previous call */
-        MemType type,    /* which part of the memory to expand */
-        int len_to_copy, /* size of the memory to be copied to new store */
-        int keep_prev,   /* = 1: use prev_len;
-                            = 0: compute new_len to expand */
-        GlobalLU_t *Glu  /* modified - global LU data structures */
-       )
-    float    EXPAND = 1.5;
-    float    alpha;
-    void     *new_mem, *old_mem;
-    int      new_len, tries, lword, extra, bytes_to_copy;
-    alpha = EXPAND;
-    if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
-        new_len = *prev_len;
-    else {
-       new_len = alpha * *prev_len;
-    }
-    if ( type == LSUB || type == USUB ) lword = sizeof(int);
-    else lword = sizeof(doublecomplex);
-    if ( Glu->MemModel == SYSTEM ) {
-       new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-       if ( no_expand != 0 ) {
-           tries = 0;
-           if ( keep_prev ) {
-               if ( !new_mem ) return (NULL);
-           } else {
-               while ( !new_mem ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-               }
-           }
-           if ( type == LSUB || type == USUB ) {
-               copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
-           } else {
-               copy_mem_doublecomplex(len_to_copy, expanders[type].mem, 
-           }
-           SUPERLU_FREE (expanders[type].mem);
-       }
-       expanders[type].mem = (void *) new_mem;
-    } else { /* MemModel == USER */
-       if ( no_expand == 0 ) {
-           new_mem = zuser_malloc(new_len * lword, HEAD);
-           if ( NotDoubleAlign(new_mem) &&
-               (type == LUSUP || type == UCOL) ) {
-               old_mem = new_mem;
-               new_mem = (void *)DoubleAlign(new_mem);
-               extra = (char*)new_mem - (char*)old_mem;
-#ifdef DEBUG           
-               printf("expand(): not aligned, extra %d\n", extra);
-               stack.top1 += extra;
-               stack.used += extra;
-           }
-           expanders[type].mem = (void *) new_mem;
-       }
-       else {
-           tries = 0;
-           extra = (new_len - *prev_len) * lword;
-           if ( keep_prev ) {
-               if ( StackFull(extra) ) return (NULL);
-           } else {
-               while ( StackFull(extra) ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   extra = (new_len - *prev_len) * lword;          
-               }
-           }
-           if ( type != USUB ) {
-               new_mem = (void*)((char*)expanders[type + 1].mem + extra);
-               bytes_to_copy = (char*)stack.array + stack.top1
-                   - (char*)expanders[type + 1].mem;
-               user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
-               if ( type < USUB ) {
-                   Glu->usub = expanders[USUB].mem =
-                       (void*)((char*)expanders[USUB].mem + extra);
-               }
-               if ( type < LSUB ) {
-                   Glu->lsub = expanders[LSUB].mem =
-                       (void*)((char*)expanders[LSUB].mem + extra);
-               }
-               if ( type < UCOL ) {
-                   Glu->ucol = expanders[UCOL].mem =
-                       (void*)((char*)expanders[UCOL].mem + extra);
-               }
-               stack.top1 += extra;
-               stack.used += extra;
-               if ( type == UCOL ) {
-                   stack.top1 += extra;   /* Add same amount for USUB */
-                   stack.used += extra;
-               }
-           } /* if ... */
-       } /* else ... */
-    }
-    expanders[type].size = new_len;
-    *prev_len = new_len;
-    if ( no_expand ) ++no_expand;
-    return (void *) expanders[type].mem;
-} /* zexpand */
- * Compress the work[] array to remove fragmentation.
- */
-zStackCompress(GlobalLU_t *Glu)
-    register int iword, dword, ndim;
-    char    *last, *fragment;
-    int      *ifrom, *ito;
-    doublecomplex   *dfrom, *dto;
-    int      *xlsub, *lsub, *xusub, *usub, *xlusup;
-    doublecomplex   *ucol, *lusup;
-    iword = sizeof(int);
-    dword = sizeof(doublecomplex);
-    ndim = Glu->n;
-    xlsub  = Glu->xlsub;
-    lsub   = Glu->lsub;
-    xusub  = Glu->xusub;
-    usub   = Glu->usub;
-    xlusup = Glu->xlusup;
-    ucol   = Glu->ucol;
-    lusup  = Glu->lusup;
-    dfrom = ucol;
-    dto = (doublecomplex *)((char*)lusup + xlusup[ndim] * dword);
-    copy_mem_doublecomplex(xusub[ndim], dfrom, dto);
-    ucol = dto;
-    ifrom = lsub;
-    ito = (int *) ((char*)ucol + xusub[ndim] * iword);
-    copy_mem_int(xlsub[ndim], ifrom, ito);
-    lsub = ito;
-    ifrom = usub;
-    ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
-    copy_mem_int(xusub[ndim], ifrom, ito);
-    usub = ito;
-    last = (char*)usub + xusub[ndim] * iword;
-    fragment = (char*) (((char*)stack.array + stack.top1) - last);
-    stack.used -= (long int) fragment;
-    stack.top1 -= (long int) fragment;
-    Glu->ucol = ucol;
-    Glu->lsub = lsub;
-    Glu->usub = usub;
-#ifdef DEBUG
-    printf("zStackCompress: fragment %d\n", fragment);
-    /* for (last = 0; last < ndim; ++last)
-       print_lu_col("After compress:", last, 0);*/
- * Allocate storage for original matrix A
- */
-zallocateA(int n, int nnz, doublecomplex **a, int **asub, int **xa)
-    *a    = (doublecomplex *) doublecomplexMalloc(nnz);
-    *asub = (int *) intMalloc(nnz);
-    *xa   = (int *) intMalloc(n+1);
-doublecomplex *doublecomplexMalloc(int n)
-    doublecomplex *buf;
-    buf = (doublecomplex *) SUPERLU_MALLOC((size_t)n * sizeof(doublecomplex)); 
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in doublecomplexMalloc()\n");
-    }
-    return (buf);
-doublecomplex *doublecomplexCalloc(int n)
-    doublecomplex *buf;
-    register int i;
-    doublecomplex zero = {0.0, 0.0};
-    buf = (doublecomplex *) SUPERLU_MALLOC((size_t)n * sizeof(doublecomplex));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in doublecomplexCalloc()\n");
-    }
-    for (i = 0; i < n; ++i) buf[i] = zero;
-    return (buf);
-int zmemory_usage(const int nzlmax, const int nzumax, 
-                 const int nzlumax, const int n)
-    register int iword, dword;
-    iword   = sizeof(int);
-    dword   = sizeof(doublecomplex);
-    return (10 * n * iword +
-           nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
diff --git a/superlu/zmyblas2.c b/superlu/zmyblas2.c
deleted file mode 100644
index 5f2f3241..00000000
--- a/superlu/zmyblas2.c
+++ /dev/null
@@ -1,203 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:          zmyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
-#include "slu_dcomplex.h"
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void zlsolve ( int ldm, int ncol, doublecomplex *M, doublecomplex *rhs )
-    int k;
-    doublecomplex x0, x1, x2, x3, temp;
-    doublecomplex *M0;
-    doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-    M0 = &M[0];
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-       Mki0 = M0 + 1;
-       Mki1 = Mki0 + ldm + 1;
-       Mki2 = Mki1 + ldm + 1;
-       Mki3 = Mki2 + ldm + 1;
-       x0 = rhs[firstcol];
-       zz_mult(&temp, &x0, Mki0); Mki0++;
-       z_sub(&x1, &rhs[firstcol+1], &temp);
-       zz_mult(&temp, &x0, Mki0); Mki0++;
-       z_sub(&x2, &rhs[firstcol+2], &temp);
-       zz_mult(&temp, &x1, Mki1); Mki1++;
-       z_sub(&x2, &x2, &temp);
-       zz_mult(&temp, &x0, Mki0); Mki0++;
-       z_sub(&x3, &rhs[firstcol+3], &temp);
-       zz_mult(&temp, &x1, Mki1); Mki1++;
-       z_sub(&x3, &x3, &temp);
-       zz_mult(&temp, &x2, Mki2); Mki2++;
-       z_sub(&x3, &x3, &temp);
-       rhs[++firstcol] = x1;
-       rhs[++firstcol] = x2;
-       rhs[++firstcol] = x3;
-       ++firstcol;
-       for (k = firstcol; k < ncol; k++) {
-           zz_mult(&temp, &x0, Mki0); Mki0++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-           zz_mult(&temp, &x1, Mki1); Mki1++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-           zz_mult(&temp, &x2, Mki2); Mki2++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-           zz_mult(&temp, &x3, Mki3); Mki3++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-       }
-        M0 += 4 * ldm + 4;
-    }
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-        Mki0 = M0 + 1;
-        Mki1 = Mki0 + ldm + 1;
-        x0 = rhs[firstcol];
-       zz_mult(&temp, &x0, Mki0); Mki0++;
-       z_sub(&x1, &rhs[firstcol+1], &temp);
-       rhs[++firstcol] = x1;
-       ++firstcol;
-       for (k = firstcol; k < ncol; k++) {
-           zz_mult(&temp, &x0, Mki0); Mki0++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-           zz_mult(&temp, &x1, Mki1); Mki1++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-       } 
-    }
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-zusolve ( ldm, ncol, M, rhs )
-int ldm;       /* in */
-int ncol;      /* in */
-doublecomplex *M;      /* in */
-doublecomplex *rhs;    /* modified */
-    doublecomplex xj, temp;
-    int jcol, j, irow;
-    jcol = ncol - 1;
-    for (j = 0; j < ncol; j++) {
-       z_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
-       rhs[jcol] = xj;
-       for (irow = 0; irow < jcol; irow++) {
-           zz_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
-           z_sub(&rhs[irow], &rhs[irow], &temp);
-       }
-       jcol--;
-    }
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void zmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-int ldm;       /* in -- leading dimension of M */
-int nrow;      /* in */ 
-int ncol;      /* in */
-doublecomplex *M;      /* in */
-doublecomplex *vec;    /* in */
-doublecomplex *Mxvec;  /* in/out */
-    doublecomplex vi0, vi1, vi2, vi3;
-    doublecomplex *M0, temp;
-    doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-    int k;
-    M0 = &M[0];
-    while ( firstcol < ncol - 3 ) {    /* Do 4 columns */
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-       Mki2 = Mki1 + ldm;
-       Mki3 = Mki2 + ldm;
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       for (k = 0; k < nrow; k++) {
-           zz_mult(&temp, &vi0, Mki0); Mki0++;
-           z_add(&Mxvec[k], &Mxvec[k], &temp);
-           zz_mult(&temp, &vi1, Mki1); Mki1++;
-           z_add(&Mxvec[k], &Mxvec[k], &temp);
-           zz_mult(&temp, &vi2, Mki2); Mki2++;
-           z_add(&Mxvec[k], &Mxvec[k], &temp);
-           zz_mult(&temp, &vi3, Mki3); Mki3++;
-           z_add(&Mxvec[k], &Mxvec[k], &temp);
-       }
-       M0 += 4 * ldm;
-    }
-    while ( firstcol < ncol ) {                /* Do 1 column */
-       Mki0 = M0;
-       vi0 = vec[firstcol++];
-       for (k = 0; k < nrow; k++) {
-           zz_mult(&temp, &vi0, Mki0); Mki0++;
-           z_add(&Mxvec[k], &Mxvec[k], &temp);
-       }
-       M0 += ldm;
-    }
diff --git a/superlu/zpanel_bmod.c b/superlu/zpanel_bmod.c
deleted file mode 100644
index ba9dc0e8..00000000
--- a/superlu/zpanel_bmod.c
+++ /dev/null
@@ -1,477 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_zdefs.h"
-extern void ztrsv_();
-extern void zgemv_();
- * Function prototypes 
- */
-void zlsolve(int, int, doublecomplex *, doublecomplex *);
-void zmatvec(int, int, int, doublecomplex *, doublecomplex *, doublecomplex *);
-extern void zcheck_tempv();
-zpanel_bmod (
-           const int  m,          /* in - number of rows in the matrix */
-           const int  w,          /* in */
-           const int  jcol,       /* in */
-           const int  nseg,       /* in */
-           doublecomplex     *dense,     /* out, of size n by w */
-           doublecomplex     *tempv,     /* working array */
-           int        *segrep,    /* in */
-           int        *repfnz,    /* in, of size n by w */
-           GlobalLU_t *Glu,       /* modified */
-           SuperLUStat_t *stat    /* output */
-           )
- * Purpose
- * =======
- *
- *    Performs numeric block updates (sup-panel) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- *    Before entering this routine, the original nonzeros in the panel 
- *    were already copied into the spa[m,w].
- *
- *    Updated/Output parameters-
- *     dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
- *      collectively in the m-by-w vector dense[*]. 
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-    int          incx = 1, incy = 1;
-    doublecomplex       alpha, beta;
-    register int k, ksub;
-    int          fsupc, nsupc, nsupr, nrow;
-    int          krep, krep_ind;
-    doublecomplex       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          segsze;
-    int          block_nrow;  /* no of rows in a block row */
-    register int lptr;       /* Points to the row subscripts of a supernode */
-    int          kfnz, irow, no_zeros; 
-    register int isub, isub1, i;
-    register int jj;         /* Index through each column in the panel */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    doublecomplex       *lusup;
-    int          *xlusup;
-    int          *repfnz_col; /* repfnz[] for a column in the panel */
-    doublecomplex       *dense_col;  /* dense[] for a column in the panel */
-    doublecomplex       *tempv1;             /* Used in 1-D update */
-    doublecomplex       *TriTmp, *MatvecTmp; /* used in 2-D update */
-    doublecomplex      zero = {0.0, 0.0};
-    doublecomplex      one = {1.0, 0.0};
-    doublecomplex      comp_temp, comp_temp1;
-    register int ldaTmp;
-    register int r_ind, r_hi;
-    static   int first = 1, maxsuper, rowblk, colblk;
-    flops_t  *ops = stat->ops;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       rowblk   = sp_ienv(4);
-       colblk   = sp_ienv(5);
-       first = 0;
-    }
-    ldaTmp = maxsuper + rowblk;
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
-       /* krep = representative of current k-th supernode
-        * fsupc = first supernodal column
-        * nsupc = no of columns in a supernode
-        * nsupr = no of rows in a supernode
-        */
-        krep = segrep[k--];
-       fsupc = xsup[supno[krep]];
-       nsupc = krep - fsupc + 1;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nrow = nsupr - nsupc;
-       lptr = xlsub[fsupc];
-       krep_ind = lptr + nsupc - 1;
-       repfnz_col = repfnz;
-       dense_col = dense;
-       if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
-           TriTmp = tempv;
-           /* Sequence through each column in panel -- triangular solves */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-               ops[TRSV] += 4 * segsze * (segsze - 1);
-               ops[GEMV] += 8 * nrow * segsze;
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       z_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
-                       ++luptr;
-                   }
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   luptr1 = luptr - nsupr;
-                   if ( segsze == 2 ) {
-                       zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       z_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++;
-                           zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           z_sub(&dense_col[irow], &dense_col[irow], 
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       zz_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                       z_sub(&ukj1, &ukj1, &comp_temp);
-                       zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       z_add(&comp_temp, &comp_temp, &comp_temp1);
-                       z_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++; luptr2++;
-                           zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           z_sub(&dense_col[irow], &dense_col[irow], 
-                       }
-                   }
-               } else  {       /* segsze >= 4 */
-                   /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
-                      holds the result of triangular solves.    */
-                   no_zeros = kfnz - fsupc;
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       TriTmp[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-#ifdef _CRAY
-                   CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-                   ztrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-                   zlsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
-               } /* else ... */
-           }  /* for jj ... end tri-solves */
-           /* Block row updates; push all the way into dense[*] block */
-           for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
-               r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
-               block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
-               luptr = xlusup[fsupc] + nsupc + r_ind;
-               isub1 = lptr + nsupc + r_ind;
-               repfnz_col = repfnz;
-               TriTmp = tempv;
-               dense_col = dense;
-               /* Sequence through each column in panel -- matrix-vector */
-               for (jj = jcol; jj < jcol + w; jj++,
-                    repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-                   kfnz = repfnz_col[krep];
-                   if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-                   segsze = krep - kfnz + 1;
-                   if ( segsze <= 3 ) continue;   /* skip unrolled cases */
-                   /* Perform a block update, and scatter the result of
-                      matrix-vector to dense[].                 */
-                   no_zeros = kfnz - fsupc;
-                   luptr1 = luptr + nsupr * no_zeros;
-                   MatvecTmp = &TriTmp[maxsuper];
-                   alpha = one; 
-                    beta = zero;
-#ifdef _CRAY
-                   CGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-                   zgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-                   zmatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
-                          TriTmp, MatvecTmp);
-                   /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
-                    * such that MatvecTmp[*] can be re-used for the
-                    * the next blok row update. dense[] will be copied into 
-                    * global store after the whole panel has been finished.
-                    */
-                   isub = isub1;
-                   for (i = 0; i < block_nrow; i++) {
-                       irow = lsub[isub];
-                       z_sub(&dense_col[irow], &dense_col[irow], 
-                              &MatvecTmp[i]);
-                       MatvecTmp[i] = zero;
-                       ++isub;
-                   }
-               } /* for jj ... */
-           } /* for each block row ... */
-           /* Scatter the triangular solves into SPA dense[*] */
-           repfnz_col = repfnz;
-           TriTmp = tempv;
-           dense_col = dense;
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               if ( segsze <= 3 ) continue; /* skip unrolled cases */
-               no_zeros = kfnz - fsupc;                
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   dense_col[irow] = TriTmp[i];
-                   TriTmp[i] = zero;
-                   ++isub;
-               }
-           } /* for jj ... */
-       } else { /* 1-D block modification */
-           /* Sequence through each column in the panel */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-               ops[TRSV] += 4 * segsze * (segsze - 1);
-               ops[GEMV] += 8 * nrow * segsze;
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       z_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
-                       ++luptr;
-                   }
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr1 = luptr - nsupr;
-                   if ( segsze == 2 ) {
-                       zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       z_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr;  ++luptr1;
-                           zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           z_sub(&dense_col[irow], &dense_col[irow], 
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       zz_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                       z_sub(&ukj1, &ukj1, &comp_temp);
-                       zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       z_add(&comp_temp, &comp_temp, &comp_temp1);
-                       z_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr; ++luptr1; ++luptr2;
-                           zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           z_sub(&dense_col[irow], &dense_col[irow], 
-                       }
-                   }
-               } else  { /* segsze >= 4 */
-                   /* 
-                    * Perform a triangular solve and block update,
-                    * then scatter the result of sup-col update to dense[].
-                    */
-                   no_zeros = kfnz - fsupc;
-                   /* Copy U[*,j] segment from dense[*] to tempv[*]: 
-                    *    The result of triangular solve is in tempv[*];
-                    *    The result of matrix vector update is in dense_col[*]
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       tempv[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-#ifdef _CRAY
-                   CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-                   ztrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-                   luptr += segsze;    /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                    alpha = one;
-                    beta = zero;
-#ifdef _CRAY
-                   CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-                   zgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-                   zlsolve ( nsupr, segsze, &lusup[luptr], tempv );
-                   luptr += segsze;        /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                   zmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
-                   /* Scatter tempv[*] into SPA dense[*] temporarily, such
-                    * that tempv[*] can be used for the triangular solve of
-                    * the next column of the panel. They will be copied into 
-                    * ucol[*] after the whole panel has been finished.
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] = tempv[i];
-                       tempv[i] = zero;
-                       isub++;
-                   }
-                   /* Scatter the update from tempv1[*] into SPA dense[*] */
-                   /* Start dense rectangular L */
-                   for (i = 0; i < nrow; i++) {
-                       irow = lsub[isub];
-                       z_sub(&dense_col[irow], &dense_col[irow], &tempv1[i]);
-                       tempv1[i] = zero;
-                       ++isub; 
-                   }
-               } /* else segsze>=4 ... */
-           } /* for each column in the panel... */
-       } /* else 1-D update ... */
-    } /* for each updating supernode ... */
diff --git a/superlu/zpanel_dfs.c b/superlu/zpanel_dfs.c
deleted file mode 100644
index 4fbc963e..00000000
--- a/superlu/zpanel_dfs.c
+++ /dev/null
@@ -1,256 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_zdefs.h"
-zpanel_dfs (
-          const int  m,           /* in - number of rows in the matrix */
-          const int  w,           /* in */
-          const int  jcol,        /* in */
-          SuperMatrix *A,       /* in - original matrix */
-          int        *perm_r,     /* in */
-          int        *nseg,       /* out */
-          doublecomplex     *dense,      /* out */
-          int        *panel_lsub, /* out */
-          int        *segrep,     /* out */
-          int        *repfnz,     /* out */
-          int        *xprune,     /* out */
-          int        *marker,     /* out */     
-          int        *parent,     /* working array */
-          int        *xplore,     /* working array */
-          GlobalLU_t *Glu         /* modified */
-          )
- * Purpose
- * =======
- *
- *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives.
- *
- *   The routine returns one list of the supernodal representatives
- *   in topological order of the dfs that generates them. This list is
- *   a superset of the topological order of each individual column within
- *   the panel. 
- *   The location of the first nonzero in each supernodal segment
- *   (supernodal entry location) is also returned. Each column has a 
- *   separate list for this purpose.
- *
- *   Two marker arrays are used for dfs:
- *     marker[i] == jj, if i was visited during dfs of current column jj;
- *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
- *
- *   marker: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- */
-    NCPformat *Astore;
-    doublecomplex    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int              krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
-    int       k, krow, kmark, kperm;
-    int       xdfs, maxdfs, kpar;
-    int       jj;         /* index through each column in the panel */
-    int       *marker1;           /* marker1[jj] >= jcol if vertex jj was 
-                             by a previous column within this panel.   */
-    int       *repfnz_col; /* start of each column in the panel */
-    doublecomplex    *dense_col;  /* start of each column in the panel */
-    int       nextl_col;   /* next available position in panel_lsub[*,jj] */
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    /* Initialize pointers */
-    Astore     = A->Store;
-    a          = Astore->nzval;
-    asub       = Astore->rowind;
-    xa_begin   = Astore->colbeg;
-    xa_end     = Astore->colend;
-    marker1    = marker + m;
-    repfnz_col = repfnz;
-    dense_col  = dense;
-    *nseg      = 0;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    /* For each column in the panel */
-    for (jj = jcol; jj < jcol + w; jj++) {
-       nextl_col = (jj - jcol) * m;
-#ifdef CHK_DFS
-       printf("\npanel col %d: ", jj);
-       /* For each nonz in A[*,jj] do dfs */
-       for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
-           krow = asub[k];
-            dense_col[krow] = a[k];
-           kmark = marker[krow];       
-           if ( kmark == jj ) 
-               continue;     /* krow visited before, go to the next nonzero */
-           /* For each unmarked nbr krow of jj
-            * krow is in L: place it in structure of L[*,jj]
-            */
-           marker[krow] = jj;
-           kperm = perm_r[krow];
-           if ( kperm == EMPTY ) {
-               panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
-           }
-           /* 
-            * krow is in U: if its supernode-rep krep
-            * has been explored, update repfnz[*]
-            */
-           else {
-               krep = xsup[supno[kperm]+1] - 1;
-               myfnz = repfnz_col[krep];
-#ifdef CHK_DFS
-               printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, 
-               if ( myfnz != EMPTY ) { /* Representative visited before */
-                   if ( myfnz > kperm ) repfnz_col[krep] = kperm;
-                   /* continue; */
-               }
-               else {
-                   /* Otherwise, perform dfs starting at krep */
-                   oldrep = EMPTY;
-                   parent[krep] = oldrep;
-                   repfnz_col[krep] = kperm;
-                   xdfs = xlsub[krep];
-                   maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                   printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
-                   for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                   printf("\n");
-                   do {
-                       /* 
-                        * For each unmarked kchild of krep 
-                        */
-                       while ( xdfs < maxdfs ) {
-                           kchild = lsub[xdfs];
-                           xdfs++;
-                           chmark = marker[kchild];
-                           if ( chmark != jj ) { /* Not reached yet */
-                               marker[kchild] = jj;
-                               chperm = perm_r[kchild];
-                               /* Case kchild is in L: place it in L[*,j] */
-                               if ( chperm == EMPTY ) {
-                                   panel_lsub[nextl_col++] = kchild;
-                               } 
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               else {
-                                   chrep = xsup[supno[chperm]+1] - 1;
-                                   myfnz = repfnz_col[chrep];
-#ifdef CHK_DFS
-                                   printf("chrep %d,myfnz %d,perm_r[%d] 
-                                   if ( myfnz != EMPTY ) { /* Visited before */
-                                       if ( myfnz > chperm )
-                                           repfnz_col[chrep] = chperm;
-                                   }
-                                   else {
-                                       /* Cont. dfs at snode-rep of kchild */
-                                       xplore[krep] = xdfs;    
-                                       oldrep = krep;
-                                       krep = chrep; /* Go deeper down G(L) */
-                                       parent[krep] = oldrep;
-                                       repfnz_col[krep] = chperm;
-                                       xdfs = xlsub[krep];     
-                                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                                       printf("  xdfs %d, maxdfs %d: ", xdfs, 
-                                       for (i = xdfs; i < maxdfs; i++) 
printf(" %d", lsub[i]); 
-                                       printf("\n");
-                                   } /* else */
-                               } /* else */
-                           } /* if... */
-                       } /* while xdfs < maxdfs */
-                       /* krow has no more unexplored nbrs:
-                        *    Place snode-rep krep in postorder DFS, if this 
-                        *    segment is seen for the first time. (Note that
-                        *    "repfnz[krep]" may change later.)
-                        *    Backtrack dfs to its parent.
-                        */
-                       if ( marker1[krep] < jcol ) {
-                           segrep[*nseg] = krep;
-                           ++(*nseg);
-                           marker1[krep] = jj;
-                       }
-                       kpar = parent[krep]; /* Pop stack, mimic recursion */
-                       if ( kpar == EMPTY ) break; /* dfs done */
-                       krep = kpar;
-                       xdfs = xplore[krep];
-                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                       printf("  pop stack: krep %d,xdfs %d,maxdfs %d: ", 
-                       for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                       printf("\n");
-                   } while ( kpar != EMPTY ); /* do-while - until empty stack 
-               } /* else */
-           } /* else */
-       } /* for each nonz in A[*,jj] */
-       repfnz_col += m;    /* Move to next column */
-        dense_col += m;
-    } /* for jj ... */
diff --git a/superlu/zpivotL.c b/superlu/zpivotL.c
deleted file mode 100644
index 484ebfa2..00000000
--- a/superlu/zpivotL.c
+++ /dev/null
@@ -1,171 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <math.h>
-#include <stdlib.h>
-#include "slu_zdefs.h"
-#undef DEBUG
-        const int  jcol,     /* in */
-        const double u,      /* in - diagonal pivoting threshold */
-        int        *usepr,   /* re-use the pivot sequence given by 
perm_r/iperm_r */
-        int        *perm_r,  /* may be modified */
-        int        *iperm_r, /* in - inverse of perm_r */
-        int        *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
-        int        *pivrow,  /* out */
-        GlobalLU_t *Glu,     /* modified - global LU data structures */
-       SuperLUStat_t *stat  /* output */
-       )
- * Purpose
- * =======
- *   Performs the numerical pivoting on the current column of L,
- *   and the CDIV operation.
- *
- *   Pivot policy:
- *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
- *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
- *           pivot row = k;
- *       ELSE IF abs(A_jj) >= thresh THEN
- *           pivot row = j;
- *       ELSE
- *           pivot row = m;
- * 
- *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
- *
- *   Return value: 0      success;
- *                 i > 0  U(i,i) is exactly zero.
- *
- */
-    doublecomplex one = {1.0, 0.0};
-    int          fsupc;            /* first column in the supernode */
-    int          nsupc;            /* no of columns in the supernode */
-    int          nsupr;     /* no of rows in the supernode */
-    int          lptr;     /* points to the starting subscript of the 
supernode */
-    int          pivptr, old_pivptr, diag, diagind;
-    double       pivmax, rtemp, thresh;
-    doublecomplex       temp;
-    doublecomplex       *lu_sup_ptr; 
-    doublecomplex       *lu_col_ptr;
-    int          *lsub_ptr;
-    int          isub, icol, k, itemp;
-    int          *lsub, *xlsub;
-    doublecomplex       *lusup;
-    int          *xlusup;
-    flops_t      *ops = stat->ops;
-    /* Initialize pointers */
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
-    nsupc      = jcol - fsupc;         /* excluding jcol; nsupc >= 0 */
-    lptr       = xlsub[fsupc];
-    nsupr      = xlsub[fsupc+1] - lptr;
-    lu_sup_ptr = &lusup[xlusup[fsupc]];        /* start of the current 
supernode */
-    lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
-    lsub_ptr   = &lsub[lptr];  /* start of row indices of the supernode */
-#ifdef DEBUG
-if ( jcol == MIN_COL ) {
-    printf("Before cdiv: col %d\n", jcol);
-    for (k = nsupc; k < nsupr; k++) 
-       printf("  lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
-    /* Determine the largest abs numerical value for partial pivoting;
-       Also search for user-specified pivot, and diagonal element. */
-    if ( *usepr ) *pivrow = iperm_r[jcol];
-    diagind = iperm_c[jcol];
-    pivmax = 0.0;
-    pivptr = nsupc;
-    diag = EMPTY;
-    old_pivptr = nsupc;
-    for (isub = nsupc; isub < nsupr; ++isub) {
-        rtemp = z_abs1 (&lu_col_ptr[isub]);
-       if ( rtemp > pivmax ) {
-           pivmax = rtemp;
-           pivptr = isub;
-       }
-       if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
-       if ( lsub_ptr[isub] == diagind ) diag = isub;
-    }
-    /* Test for singularity */
-    if ( pivmax == 0.0 ) {
-       *pivrow = lsub_ptr[pivptr];
-       perm_r[*pivrow] = jcol;
-       *usepr = 0;
-       return (jcol+1);
-    }
-    thresh = u * pivmax;
-    /* Choose appropriate pivotal element by our policy. */
-    if ( *usepr ) {
-        rtemp = z_abs1 (&lu_col_ptr[old_pivptr]);
-       if ( rtemp != 0.0 && rtemp >= thresh )
-           pivptr = old_pivptr;
-       else
-           *usepr = 0;
-    }
-    if ( *usepr == 0 ) {
-       /* Use diagonal pivot? */
-       if ( diag >= 0 ) { /* diagonal exists */
-            rtemp = z_abs1 (&lu_col_ptr[diag]);
-           if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
-        }
-       *pivrow = lsub_ptr[pivptr];
-    }
-    /* Record pivot row */
-    perm_r[*pivrow] = jcol;
-    /* Interchange row subscripts */
-    if ( pivptr != nsupc ) {
-       itemp = lsub_ptr[pivptr];
-       lsub_ptr[pivptr] = lsub_ptr[nsupc];
-       lsub_ptr[nsupc] = itemp;
-       /* Interchange numerical values as well, for the whole snode, such 
-        * that L is indexed the same way as A.
-        */
-       for (icol = 0; icol <= nsupc; icol++) {
-           itemp = pivptr + icol * nsupr;
-           temp = lu_sup_ptr[itemp];
-           lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
-           lu_sup_ptr[nsupc + icol*nsupr] = temp;
-       }
-    } /* if */
-    /* cdiv operation */
-    ops[FACT] += 10 * (nsupr - nsupc);
-    z_div(&temp, &one, &lu_col_ptr[nsupc]);
-    for (k = nsupc+1; k < nsupr; k++) 
-       zz_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp);
-    return 0;
diff --git a/superlu/zpivotgrowth.c b/superlu/zpivotgrowth.c
deleted file mode 100644
index 1d598cf0..00000000
--- a/superlu/zpivotgrowth.c
+++ /dev/null
@@ -1,129 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <math.h>
-#include "slu_zdefs.h"
-zPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
-             SuperMatrix *L, SuperMatrix *U)
- * Purpose
- * =======
- *
- * Compute the reciprocal pivot growth factor of the leading ncols columns
- * of the matrix, using the formula:
- *     min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) )
- *
- * Arguments
- * =========
- *
- * ncols    (input) int
- *          The number of columns of matrices A, L and U.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = NC; Dtype = SLU_Z; Mtype = GE.
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SC; Dtype = SLU_Z; Mtype = TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = NC;
- *          Dtype = SLU_Z; Mtype = TRU.
- *
- */
-    NCformat *Astore;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    doublecomplex  *Aval, *Lval, *Uval;
-    int      fsupc, nsupr, luptr, nz_in_U;
-    int      i, j, k, oldcol;
-    int      *inv_perm_c;
-    double   rpg, maxaj, maxuj;
-    extern   double dlamch_(char *);
-    double   smlnum;
-    doublecomplex   *luval;
-    doublecomplex   temp_comp;
-    /* Get machine constants. */
-    smlnum = dlamch_("S");
-    rpg = 1. / smlnum;
-    Astore = A->Store;
-    Lstore = L->Store;
-    Ustore = U->Store;
-    Aval = Astore->nzval;
-    Lval = Lstore->nzval;
-    Uval = Ustore->nzval;
-    inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int));
-    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;
-    for (k = 0; k <= Lstore->nsuper; ++k) {
-       fsupc = L_FST_SUPC(k);
-       nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-       luptr = L_NZ_START(fsupc);
-       luval = &Lval[luptr];
-       nz_in_U = 1;
-       for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) {
-           maxaj = 0.;
-            oldcol = inv_perm_c[j];
-           for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
-               maxaj = SUPERLU_MAX( maxaj, z_abs1( &Aval[i]) );
-           maxuj = 0.;
-           for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
-               maxuj = SUPERLU_MAX( maxuj, z_abs1( &Uval[i]) );
-           /* Supernode */
-           for (i = 0; i < nz_in_U; ++i)
-               maxuj = SUPERLU_MAX( maxuj, z_abs1( &luval[i]) );
-           ++nz_in_U;
-           luval += nsupr;
-           if ( maxuj == 0. )
-               rpg = SUPERLU_MIN( rpg, 1.);
-           else
-               rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
-       }
-       if ( j >= ncols ) break;
-    }
-    SUPERLU_FREE(inv_perm_c);
-    return (rpg);
diff --git a/superlu/zpruneL.c b/superlu/zpruneL.c
deleted file mode 100644
index 854038d4..00000000
--- a/superlu/zpruneL.c
+++ /dev/null
@@ -1,156 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_zdefs.h"
-       const int  jcol,             /* in */
-       const int  *perm_r,   /* in */
-       const int  pivrow,    /* in */
-       const int  nseg,             /* in */
-       const int  *segrep,   /* in */
-       const int  *repfnz,   /* in */
-       int        *xprune,   /* out */
-       GlobalLU_t *Glu       /* modified - global LU data structures */
-       )
- * Purpose
- * =======
- *   Prunes the L-structure of supernodes whose L-structure
- *   contains the current pivot row "pivrow"
- *
- */
-    doublecomplex     utemp;
-    int        jsupno, irep, irep1, kmin, kmax, krow, movnum;
-    int        i, ktemp, minloc, maxloc;
-    int        do_prune; /* logical variable */
-    int        *xsup, *supno;
-    int        *lsub, *xlsub;
-    doublecomplex     *lusup;
-    int        *xlusup;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    /*
-     * For each supernode-rep irep in U[*,j]
-     */
-    jsupno = supno[jcol];
-    for (i = 0; i < nseg; i++) {
-       irep = segrep[i];
-       irep1 = irep + 1;
-       do_prune = FALSE;
-       /* Don't prune with a zero U-segment */
-       if ( repfnz[irep] == EMPTY )
-               continue;
-       /* If a snode overlaps with the next panel, then the U-segment 
-        * is fragmented into two parts -- irep and irep1. We should let
-        * pruning occur at the rep-column in irep1's snode. 
-        */
-       if ( supno[irep] == supno[irep1] )      /* Don't prune */
-               continue;
-       /*
-        * If it has not been pruned & it has a nonz in row L[pivrow,i]
-        */
-       if ( supno[irep] != jsupno ) {
-           if ( xprune[irep] >= xlsub[irep1] ) {
-               kmin = xlsub[irep];
-               kmax = xlsub[irep1] - 1;
-               for (krow = kmin; krow <= kmax; krow++) 
-                   if ( lsub[krow] == pivrow ) {
-                       do_prune = TRUE;
-                       break;
-                   }
-           }
-           if ( do_prune ) {
-               /* Do a quicksort-type partition
-                * movnum=TRUE means that the num values have to be exchanged.
-                */
-               movnum = FALSE;
-               if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
-                       movnum = TRUE;
-               while ( kmin <= kmax ) {
-                   if ( perm_r[lsub[kmax]] == EMPTY ) 
-                       kmax--;
-                   else if ( perm_r[lsub[kmin]] != EMPTY )
-                       kmin++;
-                   else { /* kmin below pivrow, and kmax above pivrow: 
-                           *   interchange the two subscripts
-                           */
-                       ktemp = lsub[kmin];
-                       lsub[kmin] = lsub[kmax];
-                       lsub[kmax] = ktemp;
-                       /* If the supernode has only one column, then we
-                        * only keep one set of subscripts. For any subscript 
-                        * interchange performed, similar interchange must be 
-                        * done on the numerical values.
-                        */
-                       if ( movnum ) {
-                           minloc = xlusup[irep] + (kmin - xlsub[irep]);
-                           maxloc = xlusup[irep] + (kmax - xlsub[irep]);
-                           utemp = lusup[minloc];
-                           lusup[minloc] = lusup[maxloc];
-                           lusup[maxloc] = utemp;
-                       }
-                       kmin++;
-                       kmax--;
-                   }
-               } /* while */
-               xprune[irep] = kmin;    /* Pruning */
-#ifdef CHK_PRUNE
-       printf("    After zpruneL(),using col %d:  xprune[%d] = %d\n", 
-                       jcol, irep, kmin);
-           } /* if do_prune */
-       } /* if */
-    } /* for each U-segment... */
diff --git a/superlu/zreadhb.c b/superlu/zreadhb.c
deleted file mode 100644
index 0522a67a..00000000
--- a/superlu/zreadhb.c
+++ /dev/null
@@ -1,286 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_zdefs.h"
-/* Eat up the rest of the current line */
-int zDumpLine(FILE *fp)
-    register int c;
-    while ((c = fgetc(fp)) != '\n') ;
-    return 0;
-int zParseIntFormat(char *buf, int *num, int *size)
-    char *tmp;
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    sscanf(tmp, "%d", num);
-    while (*tmp != 'I' && *tmp != 'i') ++tmp;
-    ++tmp;
-    sscanf(tmp, "%d", size);
-    return 0;
-int zParseFloatFormat(char *buf, int *num, int *size)
-    char *tmp, *period;
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-    while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd'
-          && *tmp != 'F' && *tmp != 'f') {
-        /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the
-           num picked up refers to P, which should be skipped. */
-        if (*tmp=='p' || *tmp=='P') {
-           ++tmp;
-           *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-        } else {
-           ++tmp;
-        }
-    }
-    ++tmp;
-    period = tmp;
-    while (*period != '.' && *period != ')') ++period ;
-    *period = '\0';
-    *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/
-    return 0;
-int zReadVector(FILE *fp, int n, int *where, int perline, int persize)
-    register int i, j, item;
-    char tmp, buf[100], *dummy;
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           item = atoi(&buf[j*persize]); 
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-           where[i++] = item - 1;
-       }
-    }
-    return 0;
-/* Read complex numbers as pairs of (real, imaginary) */
-int zReadValues(FILE *fp, int n, doublecomplex *destination, int perline, int 
-    register int i, j, k, s, pair;
-    register double realpart;
-    char tmp, buf[100], *dummy;
-    i = pair = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           s = j*persize;
-           for (k = 0; k < persize; ++k) /* No D_ format in C */
-               if ( buf[s+k] == 'D' || buf[s+k] == 'd' ) buf[s+k] = 'E';
-           if ( pair == 0 ) {
-               /* The value is real part */
-               realpart = atof(&buf[s]);
-               pair = 1;
-           } else {
-               /* The value is imaginary part */
-               destination[i].r = realpart;
-               destination[i++].i = atof(&buf[s]);
-               pair = 0;
-           }
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-       }
-    }
-    return 0;
-zreadhb(int *nrow, int *ncol, int *nonz,
-       doublecomplex **nzval, int **rowind, int **colptr)
- * Purpose
- * =======
- * 
- * Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format 
- * as described below.
- * 
- * Line 1 (A72,A8) 
- *     Col. 1 - 72   Title (TITLE) 
- *     Col. 73 - 80  Key (KEY) 
- * 
- * Line 2 (5I14) 
- *     Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
- *     Col. 15 - 28  Number of lines for pointers (PTRCRD) 
- *     Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
- *     Col. 43 - 56  Number of lines for numerical values (VALCRD) 
- *     Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
- *                    (including starting guesses and solution vectors 
- *                    if present) 
- *                   (zero indicates no right-hand side data is present) 
- *
- * Line 3 (A3, 11X, 4I14) 
- *     Col. 1 - 3    Matrix type (see below) (MXTYPE) 
- *     Col. 15 - 28  Number of rows (or variables) (NROW) 
- *     Col. 29 - 42  Number of columns (or elements) (NCOL) 
- *     Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
- *                   (equal to number of entries for assembled matrices) 
- *     Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
- *                   (zero in the case of assembled matrices) 
- * Line 4 (2A16, 2A20) 
- *     Col. 1 - 16   Format for pointers (PTRFMT) 
- *     Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
- *     Col. 33 - 52  Format for numerical values of coefficient matrix 
- *     Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
- *
- * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
- *     Col. 1        Right-hand side type: 
- *                       F for full storage or M for same format as matrix 
- *     Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
- *     Col. 3        X if an exact solution vector(s) is supplied. 
- *     Col. 15 - 28  Number of right-hand sides (NRHS) 
- *     Col. 29 - 42  Number of row indices (NRHSIX) 
- *                   (ignored in case of unassembled matrices) 
- *
- * The three character type field on line 3 describes the matrix type. 
- * The following table lists the permitted values for each of the three 
- * characters. As an example of the type field, RSA denotes that the matrix 
- * is real, symmetric, and assembled. 
- *
- * First Character: 
- *     R Real matrix 
- *     C Complex matrix 
- *     P Pattern only (no numerical values supplied) 
- *
- * Second Character: 
- *     S Symmetric 
- *     U Unsymmetric 
- *     H Hermitian 
- *     Z Skew symmetric 
- *     R Rectangular 
- *
- * Third Character: 
- *     A Assembled 
- *     E Elemental matrices (unassembled) 
- *
- */
-  register int i, numer_lines = 0, rhscrd = 0, dummy;
-    int tmp, colnum, colsize, rownum, rowsize, valnum, valsize;
-    char buf[100], type[4], key[10], *dummyc;
-    FILE *fp;
-    fp = stdin;
-    /* Line 1 */
-    dummyc = fgets(buf, 100, fp);
-    fputs(buf, stdout);
-#if 0
-    dummy = fscanf(fp, "%72c", buf); buf[72] = 0;
-    printf("Title: %s", buf);
-    dummy += fscanf(fp, "%8c", key);  key[8] = 0;
-    printf("Key: %s\n", key);
-    zDumpLine(fp);
-    /* Line 2 */
-    for (i=0; i<5; i++) {
-       dummy += fscanf(fp, "%14c", buf); buf[14] = 0;
-       sscanf(buf, "%d", &tmp);
-       if (i == 3) numer_lines = tmp;
-       if (i == 4 && tmp) rhscrd = tmp;
-    }
-    zDumpLine(fp);
-    /* Line 3 */
-    dummy += fscanf(fp, "%3c", type);
-    dummy += fscanf(fp, "%11c", buf); /* pad */
-    type[3] = 0;
-#ifdef DEBUG
-    printf("Matrix type %s\n", type);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nrow);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", ncol);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nonz);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", &tmp);
-    if (tmp != 0)
-         printf("This is not an assembled matrix!\n");
-    if (*nrow != *ncol)
-       printf("Matrix is not square.\n");
-    zDumpLine(fp);
-    /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */
-    zallocateA(*ncol, *nonz, nzval, rowind, colptr);
-    /* Line 4: format statement */
-    dummy += fscanf(fp, "%16c", buf);
-    zParseIntFormat(buf, &colnum, &colsize);
-    dummy += fscanf(fp, "%16c", buf);
-    zParseIntFormat(buf, &rownum, &rowsize);
-    dummy += fscanf(fp, "%20c", buf);
-    zParseFloatFormat(buf, &valnum, &valsize);
-    dummy += fscanf(fp, "%20c", buf);
-    zDumpLine(fp);
-    /* Line 5: right-hand side */    
-    if ( rhscrd ) zDumpLine(fp); /* skip RHSFMT */
-#ifdef DEBUG
-    printf("%d rows, %d nonzeros\n", *nrow, *nonz);
-    printf("colnum %d, colsize %d\n", colnum, colsize);
-    printf("rownum %d, rowsize %d\n", rownum, rowsize);
-    printf("valnum %d, valsize %d\n", valnum, valsize);
-    zReadVector(fp, *ncol+1, *colptr, colnum, colsize);
-    zReadVector(fp, *nonz, *rowind, rownum, rowsize);
-    if ( numer_lines ) {
-        zReadValues(fp, *nonz, *nzval, valnum, valsize);
-    }
-    fclose(fp);
diff --git a/superlu/zsnode_bmod.c b/superlu/zsnode_bmod.c
deleted file mode 100644
index e10da825..00000000
--- a/superlu/zsnode_bmod.c
+++ /dev/null
@@ -1,129 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_zdefs.h"
-extern void ztrsv_();
-extern void zgemv_();
- * Performs numeric block updates within the relaxed snode. 
- */
-zsnode_bmod (
-           const int  jcol,      /* in */
-           const int  jsupno,    /* in */
-           const int  fsupc,     /* in */
-           doublecomplex     *dense,    /* in */
-           doublecomplex     *tempv,    /* working array */
-           GlobalLU_t *Glu,      /* modified */
-           SuperLUStat_t *stat   /* output */
-           )
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-    int            incx = 1, incy = 1;
-    doublecomplex         alpha = {-1.0, 0.0},  beta = {1.0, 0.0};
-    doublecomplex   comp_zero = {0.0, 0.0};
-    int            luptr, nsupc, nsupr, nrow;
-    int            isub, irow, i, iptr; 
-    register int   ufirst, nextlu;
-    int            *lsub, *xlsub;
-    doublecomplex         *lusup;
-    int            *xlusup;
-    flops_t *ops = stat->ops;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nextlu = xlusup[jcol];
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = comp_zero;
-       ++nextlu;
-    }
-    xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
-    if ( fsupc < jcol ) {
-       luptr = xlusup[fsupc];
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nsupc = jcol - fsupc;   /* Excluding jcol */
-       ufirst = xlusup[jcol];  /* Points to the beginning of column
-                                  jcol in supernode L\U(jsupno). */
-       nrow = nsupr - nsupc;
-       ops[TRSV] += 4 * nsupc * (nsupc - 1);
-       ops[GEMV] += 8 * nrow * nsupc;
-#ifdef _CRAY
-       CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       ztrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       zgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-       zlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       zmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
-                       &lusup[ufirst], &tempv[0] );
-        /* Scatter tempv[*] into lusup[*] */
-       iptr = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           z_sub(&lusup[iptr], &lusup[iptr], &tempv[i]);
-            ++iptr;
-           tempv[i] = comp_zero;
-       }
-    }
-    return 0;
diff --git a/superlu/zsnode_dfs.c b/superlu/zsnode_dfs.c
deleted file mode 100644
index c860a6fb..00000000
--- a/superlu/zsnode_dfs.c
+++ /dev/null
@@ -1,113 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-#include "slu_zdefs.h"
-zsnode_dfs (
-          const int  jcol,         /* in - start of the supernode */
-          const int  kcol,         /* in - end of the supernode */
-          const int  *asub,        /* in */
-          const int  *xa_begin,    /* in */
-          const int  *xa_end,      /* in */
-          int        *xprune,      /* out */
-          int        *marker,      /* modified */
-          GlobalLU_t *Glu          /* modified */
-          )
-/* Purpose
- * =======
- *    zsnode_dfs() - Determine the union of the row structures of those 
- *    columns within the relaxed snode.
- *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
- *    the portion outside the rectangular supernode must be zero.
- *
- * Return value
- * ============
- *     0   success;
- *    >0   number of bytes allocated when run out of memory.
- *
- */
-    register int i, k, ifrom, ito, nextl, new_next;
-    int          nsuper, krow, kmark, mem_error;
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    int          nzlmax;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-    nsuper = ++supno[jcol];    /* Next available supernode number */
-    nextl = xlsub[jcol];
-    for (i = jcol; i <= kcol; i++) {
-       /* For each nonzero in A[*,i] */
-       for (k = xa_begin[i]; k < xa_end[i]; k++) {     
-           krow = asub[k];
-           kmark = marker[krow];
-           if ( kmark != kcol ) { /* First time visit krow */
-               marker[krow] = kcol;
-               lsub[nextl++] = krow;
-               if ( nextl >= nzlmax ) {
-                   if ( mem_error = zLUMemXpand(jcol, nextl, LSUB, &nzlmax, 
Glu) )
-                       return (mem_error);
-                   lsub = Glu->lsub;
-               }
-           }
-       }
-       supno[i] = nsuper;
-    }
-    /* Supernode > 1, then make a copy of the subscripts for pruning */
-    if ( jcol < kcol ) {
-       new_next = nextl + (nextl - xlsub[jcol]);
-       while ( new_next > nzlmax ) {
-           if ( mem_error = zLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-               return (mem_error);
-           lsub = Glu->lsub;
-       }
-       ito = nextl;
-       for (ifrom = xlsub[jcol]; ifrom < nextl; )
-           lsub[ito++] = lsub[ifrom++];        
-        for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
-       nextl = ito;
-    }
-    xsup[nsuper+1] = kcol + 1;
-    supno[kcol+1]  = nsuper;
-    xprune[kcol]   = nextl;
-    xlsub[kcol+1]  = nextl;
-    return 0;
diff --git a/superlu/zsp_blas2.c b/superlu/zsp_blas2.c
deleted file mode 100644
index 525f753d..00000000
--- a/superlu/zsp_blas2.c
+++ /dev/null
@@ -1,576 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
- * File name:          zsp_blas2.c
- * Purpose:            Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-#include "slu_zdefs.h"
-extern void ztrsv_();
-extern void zgemv_();
- * Function prototypes 
- */
-void zusolve(int, int, doublecomplex*, doublecomplex*);
-void zlsolve(int, int, doublecomplex*, doublecomplex*);
-void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);
-sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, doublecomplex *x, SuperLUStat_t *stat, int *info)
- *   Purpose
- *   =======
- *
- *   sp_ztrsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.
- *                trans = 'C' or 'c'   A^H*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *          
- *   L       - (input) SuperMatrix*
- *            The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_Z, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *             The factor U from the factorization Pr*A*Pc=L*U.
- *             U has types: Stype = NC, Dtype = SLU_Z, Mtype = TRU.
- *    
- *   x       - (input/output) doublecomplex*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-    SCformat *Lstore;
-    NCformat *Ustore;
-    doublecomplex   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    doublecomplex temp;
-    doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    doublecomplex comp_zero = {0.0, 0.0};
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    doublecomplex *work;
-    flops_t solve_ops;
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 
-              !lsame_(trans, "C")) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("sp_ztrsv", &i);
-       return 0;
-    }
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    if ( !(work = doublecomplexCalloc(L->nrow)) )
-       ABORT("Malloc fails for work in sp_ztrsv().");
-    if ( lsame_(trans, "N") ) {        /* Form x := inv(A)*x. */
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L)*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               nrow = nsupr - nsupc;
-                /* 1 z_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
-               solve_ops += 8 * nrow * nsupc;
-               if ( nsupc == 1 ) {
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       zz_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
-                       z_sub(&x[irow], &x[irow], &comp_zero);
-                   }
-               } else {
-#ifdef _CRAY
-                   CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-                   ztrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   zgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-                   zlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-                   zmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; ++i, ++iptr) {
-                       irow = L_SUB(iptr);
-                       z_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
-                       work[i] = comp_zero;
-                   }
-               }
-           } /* for k ... */
-       } else {
-           /* Form x := inv(U)*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           for (k = Lstore->nsuper; k >= 0; k--) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-                /* 1 z_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
-               if ( nsupc == 1 ) {
-                   z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-                   for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-                       irow = U_SUB(i);
-                       zz_mult(&comp_zero, &x[fsupc], &Uval[i]);
-                       z_sub(&x[irow], &x[irow], &comp_zero);
-                   }
-               } else {
-#ifdef _CRAY
-                   CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-                      &x[fsupc], &incx);
-                   ztrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-                   zusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-                   for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                       solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                       for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-                               i++) {
-                           irow = U_SUB(i);
-                       zz_mult(&comp_zero, &x[jcol], &Uval[i]);
-                       z_sub(&x[irow], &x[irow], &comp_zero);
-                       }
-                    }
-               }
-           } /* for k ... */
-       }
-    } else if ( lsame_(trans, "T") ) { /* Form x := inv(A')*x */
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L')*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               solve_ops += 8 * (nsupr - nsupc) * nsupc;
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                       zz_mult(&comp_zero, &x[irow], &Lval[i]);
-                       z_sub(&x[jcol], &x[jcol], &comp_zero);
-                       iptr++;
-                   }
-               }
-               if ( nsupc > 1 ) {
-                   solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                   ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               }
-           }
-       } else {
-           /* Form x := inv(U')*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                       zz_mult(&comp_zero, &x[irow], &Uval[i]);
-                       z_sub(&x[jcol], &x[jcol], &comp_zero);
-                   }
-               }
-                /* 1 z_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
-               if ( nsupc == 1 ) {
-                   z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-                   ztrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-               }
-           } /* for k ... */
-       }
-    } else { /* Form x := conj(inv(A'))*x */
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := conj(inv(L'))*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               solve_ops += 8 * (nsupr - nsupc) * nsupc;
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                        zz_conj(&temp, &Lval[i]);
-                       zz_mult(&comp_zero, &x[irow], &temp);
-                       z_sub(&x[jcol], &x[jcol], &comp_zero);
-                       iptr++;
-                   }
-               }
-               if ( nsupc > 1 ) {
-                   solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd(trans, strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   ZTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-                    ztrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-               }
-           }
-       } else {
-           /* Form x := conj(inv(U'))*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                        zz_conj(&temp, &Uval[i]);
-                       zz_mult(&comp_zero, &x[irow], &temp);
-                       z_sub(&x[jcol], &x[jcol], &comp_zero);
-                   }
-               }
-                /* 1 z_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
-               if ( nsupc == 1 ) {
-                    zz_conj(&temp, &Lval[luptr]);
-                   z_div(&x[fsupc], &x[fsupc], &temp);
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd(trans, strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   ZTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-                    ztrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr,
-                               &x[fsupc], &incx);
-               }
-           } /* for k ... */
-       }
-    }
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-sp_zgemv(char *trans, doublecomplex alpha, SuperMatrix *A, doublecomplex *x, 
-        int incx, doublecomplex beta, doublecomplex *y, int incy)
-/*  Purpose   
-    =======   
-    sp_zgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-    Parameters   
-    ==========   
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-    ALPHA  - (input) doublecomplex
-             On entry, ALPHA specifies the scalar alpha.   
-    A      - (input) SuperMatrix*
-             Before entry, the leading m by n part of the array A must   
-             contain the matrix of coefficients.   
-    X      - (input) doublecomplex*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-    BETA   - (input) doublecomplex
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-    Y      - (output) doublecomplex*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-    ==== Sparse Level 2 Blas routine.   
-    /* Local variables */
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    int info;
-    doublecomplex temp, temp1;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-    doublecomplex comp_zero = {0.0, 0.0};
-    doublecomplex comp_one = {1.0, 0.0};
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)        info = 8;
-    if (info != 0) {
-       xerbla_("sp_zgemv ", &info);
-       return 0;
-    }
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || 
-       z_eq(&alpha, &comp_zero) && 
-       z_eq(&beta, &comp_one))
-       return 0;
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-       lenx = A->ncol;
-       leny = A->nrow;
-    } else {
-       lenx = A->nrow;
-       leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if ( !z_eq(&beta, &comp_one) ) {
-       if (incy == 1) {
-           if ( z_eq(&beta, &comp_zero) )
-               for (i = 0; i < leny; ++i) y[i] = comp_zero;
-           else
-               for (i = 0; i < leny; ++i) 
-                 zz_mult(&y[i], &beta, &y[i]);
-       } else {
-           iy = ky;
-           if ( z_eq(&beta, &comp_zero) )
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = comp_zero;
-                   iy += incy;
-               }
-           else
-               for (i = 0; i < leny; ++i) {
-                   zz_mult(&y[iy], &beta, &y[iy]);
-                   iy += incy;
-               }
-       }
-    }
-    if ( z_eq(&alpha, &comp_zero) ) return 0;
-    if ( notran ) {
-       /* Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (incy == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               if ( !z_eq(&x[jx], &comp_zero) ) {
-                   zz_mult(&temp, &alpha, &x[jx]);
-                   for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       zz_mult(&temp1, &temp,  &Aval[i]);
-                       z_add(&y[irow], &y[irow], &temp1);
-                   }
-               }
-               jx += incx;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    } else {
-       /* Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (incx == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               temp = comp_zero;
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   zz_mult(&temp1, &Aval[i], &x[irow]);
-                   z_add(&temp, &temp, &temp1);
-               }
-               zz_mult(&temp1, &alpha, &temp);
-               z_add(&y[jy], &y[jy], &temp1);
-               jy += incy;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    }
-    return 0;    
-} /* sp_zgemv */
diff --git a/superlu/zsp_blas3.c b/superlu/zsp_blas3.c
deleted file mode 100644
index 7a815e2f..00000000
--- a/superlu/zsp_blas3.c
+++ /dev/null
@@ -1,140 +0,0 @@
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
- * File name:          sp_blas3.c
- * Purpose:            Sparse BLAS3, using some dense BLAS3 operations.
- */
-#include "slu_zdefs.h"
-sp_zgemm(char *transa, char *transb, int m, int n, int k, 
-         doublecomplex alpha, SuperMatrix *A, doublecomplex *b, int ldb, 
-         doublecomplex beta, doublecomplex *c, int ldc)
-/*  Purpose   
-    =======   
-    sp_z performs one of the matrix-matrix operations   
-       C := alpha*op( A )*op( B ) + beta*C,   
-    where  op( X ) is one of 
-       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
-    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
-    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
-    Parameters   
-    ==========   
-    TRANSA - (input) char*
-             On entry, TRANSA specifies the form of op( A ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSA = 'N' or 'n',  op( A ) = A.   
-                TRANSA = 'T' or 't',  op( A ) = A'.   
-                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
-             Unchanged on exit.   
-    TRANSB - (input) char*
-             On entry, TRANSB specifies the form of op( B ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSB = 'N' or 'n',  op( B ) = B.   
-                TRANSB = 'T' or 't',  op( B ) = B'.   
-                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
-             Unchanged on exit.   
-    M      - (input) int   
-             On entry,  M  specifies  the number of rows of the matrix 
-            op( A ) and of the matrix C.  M must be at least zero. 
-            Unchanged on exit.   
-    N      - (input) int
-             On entry,  N specifies the number of columns of the matrix 
-            op( B ) and the number of columns of the matrix C. N must be 
-            at least zero.
-            Unchanged on exit.   
-    K      - (input) int
-             On entry, K specifies the number of columns of the matrix 
-            op( A ) and the number of rows of the matrix op( B ). K must 
-            be at least  zero.   
-             Unchanged on exit.
-    ALPHA  - (input) doublecomplex
-             On entry, ALPHA specifies the scalar alpha.   
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_Z; Mtype = GE. 
-             In the future, more general A can be handled.
-    B      - DOUBLE COMPLEX PRECISION array of DIMENSION ( LDB, kb ), where kb 
-             n when TRANSB = 'N' or 'n',  and is  k otherwise.   
-             Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
-             part of the array B must contain the matrix B, otherwise 
-             the leading n by k part of the array B must contain the 
-             matrix B.   
-             Unchanged on exit.   
-    LDB    - (input) int
-             On entry, LDB specifies the first dimension of B as declared 
-             in the calling (sub) program. LDB must be at least max( 1, n ).  
-             Unchanged on exit.   
-    BETA   - (input) doublecomplex
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then C need not be set on input.   
-    C      - DOUBLE COMPLEX PRECISION array of DIMENSION ( LDC, n ).   
-             Before entry, the leading m by n part of the array C must 
-             contain the matrix C,  except when beta is zero, in which 
-             case C need not be set on entry.   
-             On exit, the array C is overwritten by the m by n matrix 
-            ( alpha*op( A )*B + beta*C ).   
-    LDC    - (input) int
-             On entry, LDC specifies the first dimension of C as declared 
-             in the calling (sub)program. LDC must be at least max(1,m).   
-             Unchanged on exit.   
-    ==== Sparse Level 3 Blas routine.   
-    int    incx = 1, incy = 1;
-    int    j;
-    for (j = 0; j < n; ++j) {
-       sp_zgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
-    }
-    return 0;    
diff --git a/superlu/zutil.c b/superlu/zutil.c
deleted file mode 100644
index 30bcca70..00000000
--- a/superlu/zutil.c
+++ /dev/null
@@ -1,482 +0,0 @@
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-All rights reserved. 
-The source code is distributed under BSD license, see the file License.txt
-#include <math.h>
-#include "slu_zdefs.h"
-zCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      doublecomplex *nzval, int *rowind, int *colptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    NCformat *Astore;
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->rowind = rowind;
-    Astore->colptr = colptr;
-zCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      doublecomplex *nzval, int *colind, int *rowptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    NRformat *Astore;
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->colind = colind;
-    Astore->rowptr = rowptr;
-/* Copy matrix A into matrix B. */
-zCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
-    NCformat *Astore, *Bstore;
-    int      ncol, nnz, i;
-    B->Stype = A->Stype;
-    B->Dtype = A->Dtype;
-    B->Mtype = A->Mtype;
-    B->nrow  = A->nrow;;
-    B->ncol  = ncol = A->ncol;
-    Astore   = (NCformat *) A->Store;
-    Bstore   = (NCformat *) B->Store;
-    Bstore->nnz = nnz = Astore->nnz;
-    for (i = 0; i < nnz; ++i)
-       ((doublecomplex *)Bstore->nzval)[i] = ((doublecomplex 
-    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
-    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
-zCreate_Dense_Matrix(SuperMatrix *X, int m, int n, doublecomplex *x, int ldx,
-                   Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    DNformat    *Xstore;
-    X->Stype = stype;
-    X->Dtype = dtype;
-    X->Mtype = mtype;
-    X->nrow = m;
-    X->ncol = n;
-    X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
-    Xstore = (DNformat *) X->Store;
-    Xstore->lda = ldx;
-    Xstore->nzval = (doublecomplex *) x;
-zCopy_Dense_Matrix(int M, int N, doublecomplex *X, int ldx,
-                       doublecomplex *Y, int ldy)
- *
- *  Purpose
- *  =======
- *
- *  Copies a two-dimensional matrix X to another matrix Y.
- */
-    int    i, j;
-    for (j = 0; j < N; ++j)
-        for (i = 0; i < M; ++i)
-            Y[i + j*ldy] = X[i + j*ldx];
-zCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 
-                       doublecomplex *nzval, int *nzval_colptr, int *rowind,
-                       int *rowind_colptr, int *col_to_sup, int *sup_to_col,
-                       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-    SCformat *Lstore;
-    L->Stype = stype;
-    L->Dtype = dtype;
-    L->Mtype = mtype;
-    L->nrow = m;
-    L->ncol = n;
-    L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
-    if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
-    Lstore = L->Store;
-    Lstore->nnz = nnz;
-    Lstore->nsuper = col_to_sup[n];
-    Lstore->nzval = nzval;
-    Lstore->nzval_colptr = nzval_colptr;
-    Lstore->rowind = rowind;
-    Lstore->rowind_colptr = rowind_colptr;
-    Lstore->col_to_sup = col_to_sup;
-    Lstore->sup_to_col = sup_to_col;
- * Convert a row compressed storage into a column compressed storage.
- */
-zCompRow_to_CompCol(int m, int n, int nnz, 
-                   doublecomplex *a, int *colind, int *rowptr,
-                   doublecomplex **at, int **rowind, int **colptr)
-    register int i, j, col, relpos;
-    int *marker;
-    /* Allocate storage for another copy of the matrix. */
-    *at = (doublecomplex *) doublecomplexMalloc(nnz);
-    *rowind = (int *) intMalloc(nnz);
-    *colptr = (int *) intMalloc(n+1);
-    marker = (int *) intCalloc(n);
-    /* Get counts of each column of A, and set up column pointers */
-    for (i = 0; i < m; ++i)
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
-    (*colptr)[0] = 0;
-    for (j = 0; j < n; ++j) {
-       (*colptr)[j+1] = (*colptr)[j] + marker[j];
-       marker[j] = (*colptr)[j];
-    }
-    /* Transfer the matrix into the compressed column storage. */
-    for (i = 0; i < m; ++i) {
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
-           col = colind[j];
-           relpos = marker[col];
-           (*rowind)[relpos] = i;
-           (*at)[relpos] = a[j];
-           ++marker[col];
-       }
-    }
-    SUPERLU_FREE(marker);
-zPrint_CompCol_Matrix(char *what, SuperMatrix *A)
-    NCformat     *Astore;
-    register int i,n;
-    double       *dp;
-    printf("\nCompCol matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (NCformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
-    printf("nzval: ");
-    for (i = 0; i < 2*Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%d  ", Astore->rowind[i]);
-    printf("\ncolptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->colptr[i]);
-    printf("\n");
-    fflush(stdout);
-zPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
-    SCformat     *Astore;
-    register int i, j, k, c, d, n, nsup;
-    double       *dp;
-    int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
-    printf("\nSuperNode matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (SCformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    col_to_sup = Astore->col_to_sup;
-    sup_to_col = Astore->sup_to_col;
-    rowind_colptr = Astore->rowind_colptr;
-    rowind = Astore->rowind;
-    printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", 
-          A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
-    printf("nzval:\n");
-    for (k = 0; k <= Astore->nsuper; ++k) {
-      c = sup_to_col[k];
-      nsup = sup_to_col[k+1] - c;
-      for (j = c; j < c + nsup; ++j) {
-       d = Astore->nzval_colptr[j];
-       for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
-         printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d], dp[d+1]);
-          d += 2;      
-       }
-      }
-    }
-#if 0
-    for (i = 0; i < 2*Astore->nzval_colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nnzval_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->nzval_colptr[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->rowind_colptr[n]; ++i) 
-        printf("%d  ", Astore->rowind[i]);
-    printf("\nrowind_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->rowind_colptr[i]);
-    printf("\ncol_to_sup: ");
-    for (i = 0; i < n; ++i) printf("%d  ", col_to_sup[i]);
-    printf("\nsup_to_col: ");
-    for (i = 0; i <= Astore->nsuper+1; ++i) 
-        printf("%d  ", sup_to_col[i]);
-    printf("\n");
-    fflush(stdout);
-zPrint_Dense_Matrix(char *what, SuperMatrix *A)
-    DNformat     *Astore;
-    register int i, j, lda = Astore->lda;
-    double       *dp;
-    printf("\nDense matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    Astore = (DNformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
-    printf("\nnzval: ");
-    for (j = 0; j < A->ncol; ++j) {
-        for (i = 0; i < 2*A->nrow; ++i) printf("%f  ", dp[i + j*2*lda]);
-        printf("\n");
-    }
-    printf("\n");
-    fflush(stdout);
- * Diagnostic print of column "jcol" in the U/L factor.
- */
-zprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
-    int     i, k, fsupc;
-    int     *xsup, *supno;
-    int     *xlsub, *lsub;
-    doublecomplex  *lusup;
-    int     *xlusup;
-    doublecomplex  *ucol;
-    int     *usub, *xusub;
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    printf("%s", msg);
-    printf("col %d: pivrow %d, supno %d, xprune %d\n", 
-          jcol, pivrow, supno[jcol], xprune[jcol]);
-    printf("\tU-col:\n");
-    for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
-       printf("\t%d%10.4f, %10.4f\n", usub[i], ucol[i].r, ucol[i].i);
-    printf("\tL-col in rectangular snode:\n");
-    fsupc = xsup[supno[jcol]]; /* first col of the snode */
-    i = xlsub[fsupc];
-    k = xlusup[jcol];
-    while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
-       printf("\t%d\t%10.4f, %10.4f\n", lsub[i], lusup[k].r, lusup[k].i);
-       i++; k++;
-    }
-    fflush(stdout);
- * Check whether tempv[] == 0. This should be true before and after 
- * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 
- */
-void zcheck_tempv(int n, doublecomplex *tempv)
-    int i;
-    for (i = 0; i < n; i++) {
-       if ((tempv[i].r != 0.0) || (tempv[i].i != 0.0))
-       {
-           fprintf(stderr,"tempv[%d] = {%f, %f}\n", i, tempv[i].r, tempv[i].i);
-           ABORT("zcheck_tempv");
-       }
-    }
-zGenXtrue(int n, int nrhs, doublecomplex *x, int ldx)
-    int  i, j;
-    for (j = 0; j < nrhs; ++j)
-       for (i = 0; i < n; ++i) {
-           x[i + j*ldx].r = 1.0;
-           x[i + j*ldx].i = 0.0;
-       }
- * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
- */
-zFillRHS(trans_t trans, int nrhs, doublecomplex *x, int ldx,
-         SuperMatrix *A, SuperMatrix *B)
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    DNformat *Bstore;
-    doublecomplex   *rhs;
-    doublecomplex one = {1.0, 0.0};
-    doublecomplex zero = {0.0, 0.0};
-    int      ldc;
-    char transc[1];
-    Astore = A->Store;
-    Aval   = (doublecomplex *) Astore->nzval;
-    Bstore = B->Store;
-    rhs    = Bstore->nzval;
-    ldc    = Bstore->lda;
-    if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
-    else *(unsigned char *)transc = 'T';
-    sp_zgemm(transc, "N", A->nrow, nrhs, A->ncol, one, A,
-            x, ldx, zero, rhs, ldc);
- * Fills a doublecomplex precision array with a given value.
- */
-zfill(doublecomplex *a, int alen, doublecomplex dval)
-    register int i;
-    for (i = 0; i < alen; i++) a[i] = dval;
- * Check the inf-norm of the error vector 
- */
-void zinf_norm_error(int nrhs, SuperMatrix *X, doublecomplex *xtrue)
-    DNformat *Xstore;
-    double err, xnorm;
-    doublecomplex *Xmat, *soln_work;
-    doublecomplex temp;
-    int i, j;
-    Xstore = X->Store;
-    Xmat = Xstore->nzval;
-    for (j = 0; j < nrhs; j++) {
-      soln_work = &Xmat[j*Xstore->lda];
-      err = xnorm = 0.0;
-      for (i = 0; i < X->nrow; i++) {
-        z_sub(&temp, &soln_work[i], &xtrue[i]);
-       err = SUPERLU_MAX(err, z_abs(&temp));
-       xnorm = SUPERLU_MAX(xnorm, z_abs(&soln_work[i]));
-      }
-      err = err / xnorm;
-      printf("||X - Xtrue||/||X|| = %e\n", err);
-    }
-/* Print performance of the code. */
-zPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
-           double rpg, double rcond, double *ferr,
-           double *berr, char *equed, SuperLUStat_t *stat)
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *utime;
-    flops_t  *ops;
-    utime = stat->utime;
-    ops   = stat->ops;
-    if ( utime[FACT] != 0. )
-       printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
-              ops[FACT]*1e-6/utime[FACT]);
-    printf("Identify relaxed snodes    = %8.2f\n", utime[RELAX]);
-    if ( utime[SOLVE] != 0. )
-       printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
-              ops[SOLVE]*1e-6/utime[SOLVE]);
-    Lstore = (SCformat *) L->Store;
-    Ustore = (NCformat *) U->Store;
-    printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
-    printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
-    printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
-          mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
-          mem_usage->expansions);
-    printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
-    printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
-          utime[FACT], ops[FACT]*1e-6/utime[FACT],
-          utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
-          utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
-    printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
-    printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
-          rpg, rcond, ferr[0], berr[0], equed);
-int print_doublecomplex_vec(char *what, int n, doublecomplex *vec)
-    int i;
-    printf("%s: n %d\n", what, n);
-    for (i = 0; i < n; ++i) printf("%d\t%f%f\n", i, vec[i].r, vec[i].i);
-    return 0;
diff --git a/tests/ b/tests/
index a8333043..a4450410 100644
--- a/tests/
+++ b/tests/
@@ -35,7 +35,6 @@
 #include "getfem/getfem_export.h"
 #include "getfem/getfem_regular_meshes.h"
 #include "getfem/getfem_derivatives.h"
-#include "getfem/getfem_superlu.h"
 #include "gmm/gmm.h"
 using std::endl; using std::cout; using std::cerr;
 using std::ends; using std::cin;
@@ -284,7 +283,9 @@ bool laplacian_problem::solve(void) {
     gmm::gmres(SM, U, B, P, 50, iter);
   } else {
     double rcond;
+#if defined(GMM_USES_SUPERLU)
     gmm::SuperLU_solve(SM, U, B, rcond);
     cout << "cond = " << 1/rcond << "\n";
diff --git a/tests/ b/tests/
index 1afaf42a..f469c5da 100755
--- a/tests/
+++ b/tests/
@@ -23,7 +23,6 @@ sub numerique { $a <=> $b; }
 $nb_iter = 1;                # number of iterations on each test
 $islocal = 0;
 $with_qd = 0;                # test also with dd_real and qd_real
-$with_lapack = 0;            # link with lapack
 $srcdir = $ENV{srcdir};      # source directory
 $tests_to_be_done = "";
 $fix_base_type = -1;
@@ -37,9 +36,6 @@ while(@ARGV) {               # read optional parameters
   elsif ($param eq "with-qd") {
     $with_qd = 1;
-  elsif ($param eq "with-lapack") {
-    $with_lapack = 1;
-  }
   elsif ($param eq "float") {
     $fix_base_type = 0;
@@ -75,7 +71,6 @@ while(@ARGV) {               # read optional parameters
     print "valid parameters are:\n";
     print ". the number of iterations on each test\n";
     print ". with-qd : test also with dd_real and qd_real\n";
-    print ". with-lapack : link with lapack\n";
     print ". double, float, complex_double or complex_float";
     print " to fix the base type\n";
     print ". source name of a test procedure\n";
@@ -94,11 +89,6 @@ if ($tests_to_be_done eq "") {
   $tests_to_be_done = `ls $srcdir/gmm_torture*.cc`;  # list of tests
-if ($with_qd && $with_lapack) {
-  print "Options with_qd and with_lapack are not compatible\n";
-  exit(1);
 $nb_test = 0;                # number of test procedures
 $tests_list = $tests_to_be_done;
 while ($tests_list)
@@ -116,7 +106,6 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
     if ($nb_iter == 1) { print "Testing  $org_name"; }
     else { print "Test $iter for $org_name"; }
-    if ($with_lapack) { print " linked with lapack"; }
     if ($with_qd) { print " with qd types"; }
     print "\n";
@@ -131,14 +120,12 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
     print TMPF "\n\n";
-    if ($with_lapack) {
-      print TMPF "#include<gmm_lapack_interface.h>\n\n";
-    }
     if ($with_qd) {
       print TMPF "#include <qd/dd.h>\n";
       print TMPF "#include <qd/qd.h>\n";
       print TMPF "#include <qd/fpu.h>\n\n";
+    } else {
+      print TMPF "#include <gmm/gmm_lapack_interface.h>\n\n";
     $reading_param = 1;
@@ -165,10 +152,6 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
     # $TYPES[5] = "std::complex<long double> ";
     $NB_TYPES = 4.0;
-    if ($with_lapack) {
-      $NB_TYPES = 4.0;
-    }
     if ($with_qd) {
       $TYPES[0] = "dd_real";
       $TYPES[1] = "qd_real";
@@ -214,14 +197,14 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
     print TMPF "    try {\n\n";
     for ($j = 0; $j < $nb_param; ++$j) {
       $a = rand(); $b = rand();
-      if ($with_lapack) { $a = $b = 1.0; }
+      #$a = $b = 1.0;
       $sizepp = $sizep + int(50.0*rand());
       $step = $sizep; if ($step == 0) { ++$step; }
       $step = int(1.0*int($sizepp/$step - 1)*rand()) + 1;
       if (($param[$j] == 1) || ($param[$j] == 2)) { # vectors
        $lt = $VECTOR_TYPES[0];
-       if ($param[$j] == 2 && $with_lapack==0) {
+       if ($param[$j] == 2) {
          $lt = $VECTOR_TYPES[int($NB_VECTOR_TYPES * rand())];
        if ($a < 0.1) {
@@ -265,9 +248,7 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
        $sn = $s; if ($b < 0.3) { $sn = $s + int(50.0*rand()); }
        $param_name[$j] = "param$j";
        $lt = $MATRIX_TYPES[0];
-       if ($with_lapack==0) {
-         $lt = $MATRIX_TYPES[int($NB_MATRIX_TYPES * rand())];
-       }
+        $lt = $MATRIX_TYPES[int($NB_MATRIX_TYPES * rand())];
        $li = "      $lt param$j($sm, $sn);";
        if ($a < 0.3 || $b < 0.3) {
@@ -356,12 +337,7 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
     $compile_options=`sh ../gmm-config --build-flags`;
     $compile_options="$compile_options -I$srcdir/../src -I$srcdir/../include 
-I../src -I../include";
-    $compile_libs="-lm";
-    if ($with_lapack) {
-      $compile_libs="-llapack -lblas -lg2c $compile_libs";
-      $compile_options="$compile_options -DGMM_USES_LAPACK"
-    }
+    $compile_libs=`sh ../gmm-config --libs`;
     if ($with_qd) { $compile_libs="-lqd $compile_libs"; }
 #   print "$compilo $compile_options $dest_name -o $root_name $compile_libs\n";
     print `$compilo $compile_options $dest_name -o $root_name $compile_libs`;
diff --git a/tests/ b/tests/
index 47f0d481..9f0d8ec8 100644
--- a/tests/
+++ b/tests/
@@ -26,15 +26,14 @@
 /*                                                                        */
 #include "getfem/getfem_assembling.h"
 #include "getfem/getfem_regular_meshes.h"
 #include "getfem/getfem_export.h"
 #include "gmm/gmm.h"
-#ifdef GMM_USES_MPI
+#if defined(GMM_USES_MPI)
 #include <mpi.h>
 using std::endl; using std::cout; using std::cerr;
 using std::ends; using std::cin;
@@ -77,7 +76,7 @@ struct pb_data {
   int solve_cg();
   int solve_cg2();
+#if defined(GMM_USES_SUPERLU)
   int solve_superlu();
   int solve_schwarz(int);
@@ -87,7 +86,7 @@ struct pb_data {
     switch (solver) {
     case 0 : return solve_cg();
     case 1 : return solve_cg2();
+#if defined(GMM_USES_SUPERLU)
     case 2 : return solve_superlu();
     default : return solve_schwarz(solver);
@@ -105,7 +104,7 @@ void pb_data::init(bgeot::md_param &params) {
   /*  READING PARAMETER FILE.                                            */
   /* parametres physiques */
   N = int(params.int_value("N", "Dimension"));
   mu = params.real_value("MU", "Stiffness parameter mu");
@@ -114,7 +113,7 @@ void pb_data::init(bgeot::md_param &params) {
   lambda = params.real_value("LAMBDA", "lambda");
   D.resize(N); gmm::clear(D);
   D[N-1] = params.real_value("D", "Dirichlet condition");
   /* parametres numeriques */
   LX = params.real_value("LX", "Size in X");
   LY = params.real_value("LY", "Size in Y");
@@ -126,9 +125,9 @@ void pb_data::init(bgeot::md_param &params) {
   overlap = params.real_value("OVERLAP", "overlap");
   K = int(params.int_value("K", "Degree"));
   solver = int(params.int_value("SOLVER", "solver"));
-  subdomsize = params.real_value("SUBDOMSIZE", "sub-domains size");  
+  subdomsize = params.real_value("SUBDOMSIZE", "sub-domains size");
   std::string meshname(params.string_value("MESHNAME",
-                            "mesh file name"));
+                       "mesh file name"));
   std::cout << "\n\n";
@@ -144,38 +143,38 @@ void pb_data::init(bgeot::md_param &params) {
     base_node org(N); gmm::clear(org);
     std::vector<bgeot::base_small_vector> vtab(N);
     std::vector<size_type> ref(N); std::fill(ref.begin(), ref.end(), NX);
-    for (int i = 0; i < N; i++) { 
+    for (int i = 0; i < N; i++) {
       vtab[i] = bgeot::base_small_vector(N); gmm::clear(vtab[i]);
       (vtab[i])[i] = ((i == 0) ? LX : ((i == 1) ? LY : LZ)) / scalar_type(NX);
     getfem::parallelepiped_regular_simplex_mesh(mesh, dim_type(N), org,
-                                               &(vtab[0]), &(ref[0]));
+                                                &(vtab[0]), &(ref[0]));
   if (USECOARSE) { // coarse mesh
     base_node org(N); gmm::clear(org);
     std::vector<bgeot::base_small_vector> vtab(N);
     std::vector<size_type> ref(N); std::fill(ref.begin(), ref.end(), NXCOARSE);
-    for (int i = 0; i < N; i++) { 
+    for (int i = 0; i < N; i++) {
       vtab[i] = bgeot::base_small_vector(N); gmm::clear(vtab[i]);
-      (vtab[i])[i] = 
-       ((i == 0) ? LX : ((i == 1) ? LY : LZ)) / scalar_type(NXCOARSE);
+      (vtab[i])[i] = ((i == 0) ? LX
+                               : ((i == 1) ? LY : LZ)) / scalar_type(NXCOARSE);
     getfem::parallelepiped_regular_simplex_mesh(mesh_coarse, dim_type(N), org,
-                                               &(vtab[0]), &(ref[0]));
+                                                &(vtab[0]), &(ref[0]));
   dal::bit_vector nn = mesh.convex_index(dim_type(N));
   char method[500];
   snprintf(method, 499, "FEM_PK(%d, %d)", N, K);
   mim.set_integration_method(nn, bgeot::dim_type(2*K));
   mef.set_finite_element(nn, getfem::fem_descriptor(method));
-                               getfem::fem_descriptor(method));
+                                getfem::fem_descriptor(method));
   mef_data.set_finite_element(nn, getfem::fem_descriptor(method));
@@ -186,9 +185,10 @@ void pb_data::init(bgeot::md_param &params) {
     int k = mesh.structure_of_convex(j)->nb_faces();
     for (short_type i = 0; i < k; i++) {
       if (mesh.is_convex_having_neighbor(j, i)) {
-       gmm::copy(mesh.normal_of_face_of_convex(j, i, 0), un);
-       gmm::scale(un, 1/gmm::vect_norm2(un));
-       if (gmm::abs(un[N-1] - 1.0) < 1.0E-3) mesh.region(0).add(j, i);
+        gmm::copy(mesh.normal_of_face_of_convex(j, i, 0), un);
+        gmm::scale(un, 1/gmm::vect_norm2(un));
+        if (gmm::abs(un[N-1] - 1.0) < 1.0E-3)
+          mesh.region(0).add(j, i);
@@ -198,7 +198,7 @@ void pb_data::assemble() {
   size_type nb_dof = mef.nb_dof();
   std::cout << "number of dof : "<< nb_dof << endl;
   size_type nb_dof_data = mef_data.nb_dof();
   F.resize(nb_dof); gmm::clear(F);
   U.resize(nb_dof); gmm::clear(U);
   gmm::resize(RM, nb_dof, nb_dof);
@@ -214,7 +214,7 @@ void pb_data::assemble() {
   linalg_vector STF(N * nb_dof_data);
   interpolation_function(mef_data, STF, vol_force);
   getfem::asm_source_term(F, mim, mef, mef_data, STF);
   linalg_vector UD(nb_dof);
   for (size_type j = 0; j < nb_dof/N; j++)
     for (size_type k = 0; k < size_type(N); k++) UD[j*N + k] = D[k];
@@ -228,10 +228,10 @@ int pb_data::solve_cg() {
   return int(iter.get_iteration());
+#if defined(GMM_USES_SUPERLU)
 int pb_data::solve_superlu() {
   double rcond;
-  SuperLU_solve(RM, U, F, rcond);
+  gmm::SuperLU_solve(RM, U, F, rcond);
   return 1;
@@ -255,7 +255,7 @@ int pb_data::solve_schwarz(int version) {
   size_type nsd = vB.size();
   cout << "Nomber of sub-domains = " << nsd + (USECOARSE != 0) << endl;
   if (USECOARSE) {
     cout << "interpolation coarse mesh\n";
@@ -264,21 +264,21 @@ int pb_data::solve_schwarz(int version) {
     getfem::interpolation(mef_coarse, mef, vB[nsd], true);
   gmm::iteration iter(residual, 1, 1000000);
   switch (version) {
   case 3 : gmm::additive_schwarz(RM, U, F,
-             gmm::ildlt_precond<general_sparse_matrix>(), vB, iter,
-             gmm::using_cg(), gmm::using_cg());
+           gmm::ildlt_precond<general_sparse_matrix>(), vB, iter,
+           gmm::using_cg(), gmm::using_cg());
   case 4 : gmm::additive_schwarz(RM, U, F,
-             gmm::ilu_precond<general_sparse_matrix>(), vB, iter,
-             gmm::using_gmres(), gmm::using_gmres());
+           gmm::ilu_precond<general_sparse_matrix>(), vB, iter,
+           gmm::using_gmres(), gmm::using_gmres());
+#if defined(GMM_USES_SUPERLU)
   case 5 : gmm::additive_schwarz(RM, U, F,
-             gmm::ilu_precond<general_sparse_matrix>(), vB, iter,
-             gmm::using_superlu(), gmm::using_cg());
+           gmm::ilu_precond<general_sparse_matrix>(), vB, iter,
+           gmm::using_superlu(), gmm::using_cg());
@@ -287,19 +287,19 @@ int pb_data::solve_schwarz(int version) {
 int main(int argc, char *argv[]) {
-#ifdef GMM_USES_MPI
+#if defined(GMM_USES_MPI)
   try {
     bgeot::md_param params;
     pb_data p;
     std::cout << "initialization ...\n";
     params.read_command_line(argc, argv);
     double rutime = gmm::uclock_sec();
@@ -311,7 +311,7 @@ int main(int argc, char *argv[]) {
     cout << "final residual : " << gmm::vect_norm2(p.F) << endl;
-#ifdef GMM_USES_MPI
+#if defined(GMM_USES_MPI)
   return 0;
diff --git a/tests/ b/tests/
index 2b5ade1f..d62c3ed2 100644
--- a/tests/
+++ b/tests/
@@ -34,7 +34,11 @@ int main(int argc, char *argv[]) {
   int ret=0;
 #if defined(GMM_USES_MUMPS)
+  std::string lsolver("mumps");
+  std::string lsolver("superlu");
   bgeot::md_param PARAM;
@@ -140,16 +144,16 @@ int main(int argc, char *argv[]) {
   std::cout<<"SOLVING MODEL 1 (without internal variables)"<<std::endl;
   gmm::iteration iter(1E-9, 1, 30);
   if (DIFFICULTY % 10000 > 999)
-    getfem::standard_solve(md1, iter, getfem::rselect_linear_solver(md1, 
"mumps"), ls1B);
+    getfem::standard_solve(md1, iter, getfem::rselect_linear_solver(md1, 
lsolver), ls1B);
-    getfem::standard_solve(md1, iter, getfem::rselect_linear_solver(md1, 
"mumps"), ls1A);
+    getfem::standard_solve(md1, iter, getfem::rselect_linear_solver(md1, 
lsolver), ls1A);
   std::cout<<"SOLVING MODEL 2 (with internal variables)"<<std::endl;
   if (DIFFICULTY % 10000 > 999)
-    getfem::standard_solve(md2, iter, getfem::rselect_linear_solver(md2, 
"mumps"), ls2B);
+    getfem::standard_solve(md2, iter, getfem::rselect_linear_solver(md2, 
lsolver), ls2B);
-    getfem::standard_solve(md2, iter, getfem::rselect_linear_solver(md2, 
"mumps"), ls2A);
+    getfem::standard_solve(md2, iter, getfem::rselect_linear_solver(md2, 
lsolver), ls2A);
   if (debug) {
@@ -184,8 +188,6 @@ int main(int argc, char *argv[]) {
   std::cout<<"Test with difficulty "<<DIFFICULTY<<" returned "<<ret<<std::endl;
   return ret;

reply via email to

[Prev in Thread] Current Thread [Next in Thread]